Date: (Sun) Aug 02, 2015
Data: Source: Training: https://inclass.kaggle.com/c/15-071x-the-analytics-edge-summer-2015/download/eBayiPadTrain.csv
New: https://inclass.kaggle.com/c/15-071x-the-analytics-edge-summer-2015/download/eBayiPadTest.csv
Time period:
Based on analysis utilizing <> techniques,
Regression results: First run:
Classification results: template: prdline.my == “Unknown” -> 296 Low.cor.X.glm: Leaderboard: 0.83458 newobs_tbl=[N=471, Y=327]; submit_filename=template_Final_glm_submit.csv OOB_conf_mtrx=[YN=125, NY=76]=201; max.Accuracy.OOB=0.7710; opt.prob.threshold.OOB=0.6 startprice=100.00; biddable=95.42; productline=49.22; D.T.like=29.75; D.T.use=26.32; D.T.box=21.53;
prdline: -> Worse than template prdline.my == “Unknown” -> 285 All.X.no.rnorm.rf: Leaderboard: 0.82649 newobs_tbl=[N=485, Y=313]; submit_filename=prdline_Final_rf_submit.csv OOB_conf_mtrx=[YN=119, NY=80]=199; max.Accuracy.OOB=0.8339; opt.prob.threshold.OOB=0.5 startprice=100.00; biddable=84.25; D.sum.TfIdf=7.28; D.T.use=4.26; D.T.veri=2.78; D.T.scratch=1.99; D.T.box=; D.T.like=; Low.cor.X.glm: Leaderboard: 0.81234 newobs_tbl=[N=471, Y=327]; submit_filename=prdline_Low_cor_X_glm_submit.csv OOB_conf_mtrx=[YN=125, NY=74]=199; max.Accuracy.OOB=0.8205; opt.prob.threshold.OOB=0.6 startprice=100.00; biddable=96.07; prdline.my=51.37; D.T.like=29.39; D.T.use=25.43; D.T.box=22.27; D.T.veri=; D.T.scratch=;
oobssmpl: -> Low.cor.X.glm: Leaderboard: 0.83402 newobs_tbl=[N=440, Y=358]; submit_filename=oobsmpl_Final_glm_submit OOB_conf_mtrx=[YN=114, NY=84]=198; max.Accuracy.OOB=0.7780; opt.prob.threshold.OOB=0.5 startprice=100.00; biddable=93.87; prdline.my=60.48; D.sum.TfIdf=; D.T.condition=8.69; D.T.screen=7.96; D.T.use=7.50; D.T.veri=; D.T.scratch=;
category: -> Low.cor.X.glm: Leaderboard: 0.82381 newobs_tbl=[N=470, Y=328]; submit_filename=category_Final_glm_submit OOB_conf_mtrx=[YN=119, NY=57]=176; max.Accuracy.OOB=0.8011; opt.prob.threshold.OOB=0.6 startprice=100.00; biddable=79.19; prdline.my=55.22; D.sum.TfIdf=; D.T.ipad=27.05; D.T.like=21.44; D.T.box=20.67; D.T.condition=; D.T.screen=;
dataclns: -> All.X.no.rnorm.rf: Leaderboard: 0.82211 newobs_tbl=[N=485, Y=313]; submit_filename=dataclns_Final_rf_submit OOB_conf_mtrx=[YN=104, NY=75]=179; max.Accuracy.OOB=0.7977; opt.prob.threshold.OOB=0.5 startprice.log=100.00; biddable=65.85; prdline.my=7.74; D.sum.TfIdf=; D.T.use=2.01; D.T.condition=1.87; D.T.veri=1.62; D.T.ipad=; D.T.like=; Low.cor.X.glm: Leaderboard: 0.79264 newobs_tbl=[N=460, Y=338]; submit_filename=dataclns_Low_cor_X_glm_submit OOB_conf_mtrx=[YN=113, NY=74]=187; max.Accuracy.OOB=0.7977; opt.prob.threshold.OOB=0.5 -> different from prev run of 0.6 biddable=100.00; startprice.log=91.85; prdline.my=38.34; D.sum.TfIdf=; D.T.ipad=29.92; D.T.box=27.76; D.T.work=25.79; D.T.use=; D.T.condition=;
txtterms: -> top_n = c(10) Low.cor.X.glm: Leaderboard: 0.81448 newobs_tbl=[N=442, Y=356]; submit_filename=txtterms_Final_glm_submit OOB_conf_mtrx=[YN=113, NY=69]=182; max.Accuracy.OOB=0.7943; opt.prob.threshold.OOB=0.5 biddable=100.00; startprice.log=90.11; prdline.my=37.65; D.sum.TfIdf=; D.T.ipad=28.67; D.T.work=24.90; D.T.great=21.44; # [1] “D.T.condit” “D.T.condition” “D.T.good” “D.T.ipad” “D.T.new”
# [6] “D.T.scratch” “D.T.screen” “D.T.this” “D.T.use” “D.T.work”
All.X.glm: Leaderboard: 0.81016
newobs_tbl=[N=445, Y=353]; submit_filename=txtterms_Final_glm_submit
OOB_conf_mtrx=[YN=108, NY=72]=180; max.Accuracy.OOB=0.7966;
opt.prob.threshold.OOB=0.5
biddable=100.00; startprice.log=88.24; prdline.my=33.81; D.sum.TfIdf=;
D.T.scratch=25.51; D.T.use=18.97; D.T.good=16.37;
[1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.great” “D.T.excel” “D.T.work” “D.T.ipad”
Max.cor.Y.rpart: Leaderboard: 0.79258
newobs_tbl=[N=439, Y=359]; submit_filename=txtterms_Final_rpart_submit
OOB_conf_mtrx=[YN=105, NY=76]=181; max.Accuracy.OOB=0.7954802;
opt.prob.threshold.OOB=0.5
startprice.log=100; biddable=; prdline.my=; D.sum.TfIdf=;
D.T.scratch=; D.T.use=; D.T.good=;
[1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”
All.X.no.rnorm.rf: Leaderboard: 0.80929
newobs_tbl=[N=545, Y=253]; submit_filename=txtterms_Final_rf_submit
OOB_conf_mtrx=[YN=108, NY=61]=169; max.Accuracy.OOB=0.8090395
opt.prob.threshold.OOB=0.5
startprice.log=100.00; biddable=78.82; idseq.my=63.43; prdline.my=45.57;
D.T.use=2.76; D.T.condit=2.35; D.T.scratch=2.00; D.T.good=;
[1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”
txtclstr: All.X.no.rnorm.rf: Leaderboard: 0.79363 -> 0.79573 newobs_tbl=[N=537, Y=261]; submit_filename=txtclstr_Final_rf_submit OOB_conf_mtrx=[YN=104, NY=61]=165; max.Accuracy.OOB=0.8135593 opt.prob.threshold.OOB=0.5 startprice.log=100.00; biddable=79.99; idseq.my=64.94; prdline.my=4.14; prdline.my.clusterid=1.15; [1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”
dupobs: All.X.no.rnorm.rf: Leaderboard: 0.79295 newobs_tbl=[N=541, Y=257]; submit_filename=dupobs_Final_rf_submit OOB_conf_mtrx=[YN=114, NY=65]=179; max.Accuracy.OOB=0.7977401 opt.prob.threshold.OOB=0.5 startprice.log=100.00; biddable=94.49; idseq.my=67.40; prdline.my=4.48; prdline.my.clusterid=1.99; [1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”
All.X.no.rnorm.rf: Leaderboard: 0.79652
newobs_tbl=[N=523, Y=275]; submit_filename=dupobs_Final_rf_submit
OOB_conf_mtrx=[YN=114, NY=65]=179; max.Accuracy.OOB=0.7977401
opt.prob.threshold.OOB=0.5
startprice.log=100.00; biddable=94.24; idseq.my=67.92;
prdline.my=4.33; prdline.my.clusterid=2.17;
[1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”
csmmdl: All.X.no.rnorm.rf: Leaderboard: 0.79396 newobs_tbl=[N=525, Y=273]; submit_filename=csmmdl_Final_rf_submit OOB_conf_mtrx=[YN=111, NY=66]=177; max.Accuracy.OOB=0.8000000 opt.prob.threshold.OOB=0.5 startprice.log=100.00; biddable=90.30; idseq.my=67.06; prdline.my=4.40; cellular.fctr=3.57; prdline.my.clusterid=2.08;
All.Interact.X.no.rnorm.rf: Leaderboard: 0.77867 newobs_tbl=[N=564, Y=234]; submit_filename=csmmdl_Final_rf_submit OOB_conf_mtrx=[YN=120, NY=53]=173; max.Accuracy.OOB=0.8045198 opt.prob.threshold.OOB=0.5 biddable=100.00; startprice.log=93.99; idseq.my=57.30; prdline.my=9.09; cellular.fctr=3.30; prdline.my.clusterid=2.35;
All.Interact.X.no.rnorm.rf: Leaderboard: 0.77152 newobs_tbl=[N=539, Y=259]; submit_filename=csmmdl_Final_rf_submit OOB_conf_mtrx=[YN=, NY=]=; max.Accuracy.OOB=0.8011299 opt.prob.threshold.OOB=0.5 biddable=100.00; startprice.log=94.93; idseq.my=57.12; prdline.my=9.29; cellular.fctr=3.20; prdline.my.clusterid=2.50; [1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”
All.X.glmnet:
fit_RMSE=???; OOB_RMSE=115.1247; new_RMSE=115.1247;
prdline.my.fctr=100.00; condition.fctrNew=88.53; D.npnct09.log=84.34
biddable=16.48; idseq.my=57.27;
spdiff:
All.Interact.X.no.rnorm.rf: Leaderboard: 0.78218 newobs_tbl=[N=517, Y=281]; submit_filename=spdiff_Final_rf_submit OOB_conf_mtrx=[YN=121, NY=38]=159; max.Accuracy.OOB=0.8203390 opt.prob.threshold.OOB=0.6 biddable=100.00; startprice.diff=57.53; idseq.my=41.31; prdline.my=11.43; cellular.fctr=2.36; prdline.my.clusterid=1.82;
All.X.no.rnorm.rf:
fit_RMSE=92.19; OOB_RMSE=130.86; new_RMSE=130.86;
biddable=100.00; prdline.my.fctr=61.92; idseq.my=57.77;
condition.fctr=29.53; storage.fctr=11.22; color.fctr=6.69;
cellular.fctr=6.11
All.X.no.rnorm.rf: Leaderboard: 0.77443
newobs_tbl=[N=606, Y=192]; submit_filename=spdiff_Final_rf_submit
OOB_conf_mtrx=[YN=112, NY=28]=140; max.Accuracy.OOB=0.8418079
opt.prob.threshold.OOB=0.6
startprice.diff=100.00; biddable=96.53; idseq.my=38.10;
prdline.my=3.65; cellular.fctr=2.21; prdline.my.clusterid=0.91;
[1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”
color: All.Interact.X.glmnet: fit_RMSE=88.64520; prdline.my.fctr:D.TfIdf.sum.stem.stop.Ratio=100.00; prdline.my.fctr:condition.fctr=77.35 D.TfIdf.sum.stem.stop.Ratio=68.18 prdline.my.fctr:color.fctr=68.12 prdline.my.fctr:storage.fctr=63.32
All.X.no.rnorm.rf: Leaderboard: 0.80638
newobs_tbl=[N=550, Y=248]; submit_filename=color_Final_rf_submit
OOB_conf_mtrx=[YN=108, NY=54]=162; max.Accuracy.OOB=0.8169492
opt.prob.threshold.OOB=0.5
biddable=100.00; startprice.diff=77.90; idseq.my=48.49;
D.ratio.sum.TfIdf.nwrds=6.48; storage.fctr=4.74;
D.TfIdf.sum.stem.stop.Ratio=4.57; prdline.my=4.32;
[1] “D.T.condit” “D.T.use” “D.T.scratch” “D.T.new” “D.T.good” “D.T.screen” [7] “D.T.ipad” “D.T.great” “D.T.work” “D.T.excel”
assctxt: select_terms: [1] “condit” “use” “scratch” “new” “good” “ipad” “screen” “great”
[9] “work” “excel” “like” “box” “function” “item” “fulli” “minor” [17] “cosmet” “crack” “mint” “wear”
assoc_terms: [1] “bare” “sign” “light” “back” “hous” “tab” “dent”
[8] “brand” “open” “mini” “appl” “air” “wifi” “affect”
[15] “protector” “shape” “perfect” “order” “button” “origin” “retail”
[22] “seal” “includ” “100” “may” “show” “overal” “bodi”
[29] “phone” “will” “damag” “near” “top” “normal” “tear”
[36] “expect” “minim”
glb_allobs_df\(prdline.my\).clusterid Entropy: 0.6665 (97.3037 pct) All.Interact.X.glmnet: fit_RMSE=88.40723; prdline.my.fctr:D.TfIdf.sum.stem.stop.Ratio=100.00; prdline.my.fctriPadAir:D.npnct01.log=79.67748; D.TfIdf.sum.stem.stop.Ratio=79.08192; prdline.my.fctriPadmini 2+:condition.fctrNew other (see details)=78.24020; prdline.my.fctriPad 3+:color.fctrSpace Gray=77.05886; prdline.my.fctriPadmini 2+:storage.fctrUnknown=75.68145; prdline.my.fctrUnknown:.clusterid.fctr3=74.23727;
All.Interact.X.no.rnorm.rf: Leaderboard: 0.72974
newobs_tbl=[N=682, Y=116]; submit_filename=assctxt_Final_rf_submit
OOB_conf_mtrx=[YN=125, NY=43]=168; max.Accuracy.OOB=0.8101695; max.auc.OOB=???;
opt.prob.threshold.OOB=0.6
biddable=100.00; startprice.diff=51.04; idseq.my=29.51;
startprice.diff:biddable=28.70
prdline.my.fctriPadmini:idseq.my=6.89
Highest max.auc.OOB=???; for model:
ctgry2: select_terms: 50 assoc_terms: 103 glb_allobs_df\(prdline.my\).clusterid Entropy: 0.6559 (96.7556 pct) All.Interact.X.glmnet: next: All.X.glmnet fit_RMSE=88.80010; prdl.my.descr.fctr:storage.fctr 100.00 prdl.my.descr.fctr:condition.fctr 93.96 prdl.my.descr.fctr:D.npnct01.log 89.94 D.TfIdf.sum.stem.stop.Ratio 75.90 prdl.my.descr.fctr:color.fctr 72.43 prdl.my.descr.fctr:.clusterid.fctr7 63.97 prdl.my.descr.fctr:D.npnct08.log 63.46 prdl.my.descr.fctr 63.05 prdl.my.descr.fctr:D.TfIdf.sum.stem.stop.Ratio 62.91 prdl.my.descr.fctr:D.npnct16.log 62.39
Ensemble.glmnet: Leaderboard: 0.80480
newobs_tbl=[N=473, Y=325]; submit_filename=ctgry2_Final_glmnet_submit
OOB_conf_mtrx=[YN=79, NY=101]=180;
max.Accuracy.OOB=0.7977528; max.auc.OOB=0.8554068; opt.prob.threshold.OOB=0.4
Highest max.auc.OOB=0.8587215; for model:All.X.no.rnorm.rf
biddable 100.000
startprice.diff 71.793
idseq.my 43.511
ensemble: select_terms: 50 assoc_terms: 103 glb_allobs_df\(prdline.my\).clusterid Entropy: 0.6570 (96.9282 pct) Final.glment: min.RMSE.fit=31.45801 Ensemble.glmnet: min.RMSE.fit=30.67172 startprice.predict.All.Interact.X.no.rnorm.rf 100.000 startprice.predict.All.X.no.rnorm.rf 75.381 All.X.glmnet: min.RMSE.fit=88.98066 prdl.my.descr.fctr 100.00 D.TfIdf.sum.stem.stop.Ratio 92.16 condition.fctr 79.01 prdl.my.descr.fctr:.clusterid.fctr5 69.91 D.npnct16.log 61.70 color.fctrWhite 59.42 D.npnct01.log 55.07 cellular.fctr1 53.35 D.terms.n.post.stop 52.92
Ensemble.glmnet: Leaderboard: 0.73183
newobs_tbl=[N=557, Y=241]; submit_filename=ensemble_Final_glmnet_submit
OOB_conf_mtrx=[YN=75, NY=60]=135;
max.Accuracy.OOB=0.8483146; max.auc.OOB=0.9187365; opt.prob.threshold.OOB=0.5
sold.fctr.predict.All.X.no.rnorm.rf.prob 100.000000
sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob 98.873608
Highest max.auc.OOB=0.9180131; for model:All.X.no.rnorm.rf
startprice.diff 100.000 biddable 95.318 idseq.my 33.365
Use plot.ly for interactive plots ?
varImp for randomForest crashes in caret version:6.0.41 -> submit bug report
extensions toward multiclass classification are scheduled for the next release
glm_dmy_mdl should use the same method as glm_sel_mdl until custom dummy classifer is implemented
rm(list=ls())
set.seed(12345)
options(stringsAsFactors=FALSE)
source("~/Dropbox/datascience/R/myscript.R")
source("~/Dropbox/datascience/R/mydsutils.R")
## Loading required package: caret
## Loading required package: lattice
## Loading required package: ggplot2
source("~/Dropbox/datascience/R/myplot.R")
source("~/Dropbox/datascience/R/mypetrinet.R")
source("~/Dropbox/datascience/R/myplclust.R")
# Gather all package requirements here
suppressPackageStartupMessages(require(doMC))
registerDoMC(4) # max(length(glb_txt_vars), glb_n_cv_folds) + 1
#packageVersion("tm")
#require(sos); findFn("cosine", maxPages=2, sortby="MaxScore")
# Analysis control global variables
glb_trnng_url <- "https://inclass.kaggle.com/c/15-071x-the-analytics-edge-summer-2015/download/eBayiPadTrain.csv"
glb_newdt_url <- "https://inclass.kaggle.com/c/15-071x-the-analytics-edge-summer-2015/download/eBayiPadTest.csv"
glb_out_pfx <- "ensemble_"
glb_save_envir <- FALSE # or TRUE
glb_is_separate_newobs_dataset <- TRUE # or TRUE
glb_split_entity_newobs_datasets <- TRUE # or FALSE
glb_split_newdata_method <- "sample" # "condition" or "sample" or "copy"
glb_split_newdata_condition <- NULL # or "is.na(<var>)"; "<var> <condition_operator> <value>"
glb_split_newdata_size_ratio <- 0.3 # > 0 & < 1
glb_split_sample.seed <- 123 # or any integer
glb_max_fitobs <- NULL # or any integer
glb_is_regression <- FALSE; glb_is_classification <- !glb_is_regression;
glb_is_binomial <- TRUE #or FALSE
glb_rsp_var_raw <- "sold"
# for classification, the response variable has to be a factor
glb_rsp_var <- "sold.fctr" # or glb_rsp_var_raw
# if the response factor is based on numbers/logicals e.g (0/1 OR TRUE/FALSE vs. "A"/"B"),
# or contains spaces (e.g. "Not in Labor Force")
# caret predict(..., type="prob") crashes
glb_map_rsp_raw_to_var <- function(raw) {
# return(log(raw))
ret_vals <- rep_len(NA, length(raw)); ret_vals[!is.na(raw)] <- ifelse(raw[!is.na(raw)] == 1, "Y", "N"); return(relevel(as.factor(ret_vals), ref="N"))
# #as.factor(paste0("B", raw))
# #as.factor(gsub(" ", "\\.", raw))
}
glb_map_rsp_raw_to_var(c(1, 1, 0, 0, NA))
## [1] Y Y N N <NA>
## Levels: N Y
glb_map_rsp_var_to_raw <- function(var) {
# return(exp(var))
as.numeric(var) - 1
# #as.numeric(var)
# #gsub("\\.", " ", levels(var)[as.numeric(var)])
# c("<=50K", " >50K")[as.numeric(var)]
# #c(FALSE, TRUE)[as.numeric(var)]
}
glb_map_rsp_var_to_raw(glb_map_rsp_raw_to_var(c(1, 1, 0, 0, NA)))
## [1] 1 1 0 0 NA
if ((glb_rsp_var != glb_rsp_var_raw) & is.null(glb_map_rsp_raw_to_var))
stop("glb_map_rsp_raw_to_var function expected")
glb_rsp_var_out <- paste0(glb_rsp_var, ".predict.") # model_id is appended later
# List info gathered for various columns
# <col_name>: <description>; <notes>
# description = The text description of the product provided by the seller.
# biddable = Whether this is an auction (biddable=1) or a sale with a fixed price (biddable=0).
# startprice = The start price (in US Dollars) for the auction (if biddable=1) or the sale price (if biddable=0).
# condition = The condition of the product (new, used, etc.)
# cellular = Whether the iPad has cellular connectivity (cellular=1) or not (cellular=0).
# carrier = The cellular carrier for which the iPad is equipped (if cellular=1); listed as "None" if cellular=0.
# color = The color of the iPad.
# storage = The iPad's storage capacity (in gigabytes).
# productline = The name of the product being sold.
# If multiple vars are parts of id, consider concatenating them to create one id var
# If glb_id_var == NULL, ".rownames <- row.names()" is the default
# Derive a numeric feature from id var
glb_id_var <- c("UniqueID")
glb_category_var <- c("prdline.my")
glb_drop_vars <- c(NULL) # or c("<col_name>")
glb_map_vars <- NULL # or c("<var1>", "<var2>")
glb_map_urls <- list();
# glb_map_urls[["<var1>"]] <- "<var1.url>"
glb_assign_pairs_lst <- NULL;
# glb_assign_pairs_lst[["<var1>"]] <- list(from=c(NA),
# to=c("NA.my"))
glb_assign_vars <- names(glb_assign_pairs_lst)
# Derived features
glb_derive_lst <- NULL;
# Add logs of numerics that are not distributed normally -> do automatically ???
glb_derive_lst[["idseq.my"]] <- list(
mapfn=function(UniqueID) { return(UniqueID - 10000) }
, args=c("UniqueID"))
glb_derive_lst[["prdline.my"]] <- list(
mapfn=function(productline) { return(productline) }
, args=c("productline"))
glb_derive_lst[["startprice.log"]] <- list(
mapfn=function(startprice) { return(log(startprice)) }
, args=c("startprice"))
# glb_derive_lst[["startprice.log.zval"]] <- list(
glb_derive_lst[["descr.my"]] <- list(
mapfn=function(description) { mod_raw <- description;
# Modifications for this exercise only
# Add dictionary to stemDocument e.g. stickers stemmed to sticker ???
mod_raw <- gsub("\\.\\.", "\\. ", mod_raw);
mod_raw <- gsub("(\\w)(\\*|,|-|/)(\\w)", "\\1\\2 \\3", mod_raw);
mod_raw <- gsub("8\\.25", "825", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" 10\\.SCREEN ", " 10\\. SCREEN ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" 128 gb ", " 128gb ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" 16G, ", " 16GB, ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" actuuly ", " actual ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" Apple care ", " Applecare ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" ans ", " and ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" bacK!wiped ", " bacK ! wiped ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" backplate", " back plate", mod_raw, ignore.case=TRUE);
mod_raw <- gsub("\\bbarley", "barely", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" bend ", " bent ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub("Best Buy", "BestBuy", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" black\\.Device ", " black \\. Device ", mod_raw,
ignore.case=TRUE);
mod_raw <- gsub("black\\),charger ", "black\\), charger ", mod_raw,
ignore.case=TRUE);
mod_raw <- gsub(" blocks", " blocked", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" brokenCharger ", " broken Charger ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" carefully ", " careful ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" (conditon|condtion|contidion|conditions)", " condition", mod_raw,
ignore.case=TRUE);
mod_raw <- gsub("(CONDITION|ONLY)\\.(\\w)", "\\1\\. \\2", mod_raw,
ignore.case=TRUE);
mod_raw <- gsub("(condition)(Has)", "\\1\\. \\2", mod_raw);
mod_raw <- gsub(" consist ", " consistent ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" cracksNo ", " cracks No ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" DEFAULTING ", " DEFAULT ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" definitely ", " definite ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" described", " describe", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" desciption", " description", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" devices", " device", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" Digi\\.", " Digitizer\\.", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" display\\.New ", " display\\. New ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" displays", " display", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" drop ", " dropped ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" effect ", " affect ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" Excellant ", " Excellent ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" excellently", " excellent", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" EUC ", " excellent used condition", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" feels ", " feel ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" fineiCloud ", " fine iCloud ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" functioanlity", " functionality", mod_raw, ignore.case=TRUE);
mod_raw <- gsub("^Gentle ", "Gently ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub("\\(gray color", "\\(spacegray color", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" GREAT\\.SCreen ", " GREAT\\. SCreen ", mod_raw,
ignore.case=TRUE);
mod_raw <- gsub(" Framing ", " Frame ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub("iCL0UD", "iCLOUD", mod_raw, ignore.case=TRUE);
mod_raw <- gsub("^iPad Black 3rd generation ", "iPad 3 Black ", mod_raw,
ignore.case=TRUE);
mod_raw <- gsub(" IMEINo ", " IMEI No ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" install\\. ", " installed\\. ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub("inivisible", "invisible", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" manuals ", " manual ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" book ", " manual ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" mars ", " marks ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" marks\\.Absolutely ", " marks\\. Absolutely ", mod_raw,
ignore.case=TRUE);
mod_raw <- gsub(" minimum", " minimal", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" MINT\\.wiped ", " MINT\\. wiped ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" NEW\\!(SCREEN|ONE) ", " NEW\\! \\1 ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" new looking$", " looks new", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" newer ", " new ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" oped ", " opened ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" opening", " opened", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" operated", " operational", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" perfectlycord ", " perfectly cord ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" performance", " performs", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" personalized ", " personal ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" products ", " product ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" Keeped ", " Kept ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" knicks ", " nicks ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub("^READiPad ", "READ iPad ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" re- assemble ", " reassemble ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" REFURB\\.", " REFURBISHED\\.", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" reponding", " respond", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" rotation ", " rotate ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" Sales ", " Sale ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" scratchs ", " scratches ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" SCREEB ", " SCREEN ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" shipped| Shipment", " ship", mod_raw, ignore.case=TRUE);
mod_raw <- gsub("shrink wrap", "shrinkwrap", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" sides ", " side ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" skinned,", " skin,", mod_raw, ignore.case=TRUE);
mod_raw <- gsub("\\bspace (grey|gray)", "spacegray", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" spec ", " speck ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub("^somescratches ", "some scratches ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" Sticker ", " Stickers ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub("SWAPPA\\.COM", "SWAPPACOM", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" T- Mobile", " TMobile", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" touchscreen ", " touch screen ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" UnlockedCracked ", " Unlocked Cracked ", mod_raw,
ignore.case=TRUE);
mod_raw <- gsub(" uppser ", " upper ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" use\\.Scratches ", " use\\. Scratches ", mod_raw,
ignore.case=TRUE);
mod_raw <- gsub(" verify ", " verified ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" wear\\.Device ", " wear\\. Device ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" whats ", " what's ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" WiFi\\+4G ", " WiFi \\+ 4G ", mod_raw, ignore.case=TRUE);
mod_raw <- gsub(" Zaag Invisible Shield", " Zaag InvisibleShield", mod_raw,
ignore.case=TRUE);
return(mod_raw) }
, args=c("description"))
# mapfn=function(startprice) { return(scale(log(startprice))) }
# , args=c("startprice"))
# mapfn=function(Rasmussen) { return(ifelse(sign(Rasmussen) >= 0, 1, 0)) }
# mapfn=function(PropR) { return(as.factor(ifelse(PropR >= 0.5, "Y", "N"))) }
# mapfn=function(purpose) { return(relevel(as.factor(purpose), ref="all_other")) }
# mapfn=function(Week) { return(substr(Week, 1, 10)) }
# mapfn=function(raw) { tfr_raw <- as.character(cut(raw, 5));
# tfr_raw[is.na(tfr_raw)] <- "NA.my";
# return(as.factor(tfr_raw)) }
# , args=c("raw"))
# mapfn=function(PTS, oppPTS) { return(PTS - oppPTS) }
# , args=c("PTS", "oppPTS"))
# # If glb_allobs_df is not sorted in the desired manner
# mapfn=function(Week) { return(coredata(lag(zoo(orderBy(~Week, glb_allobs_df)$ILI), -2, na.pad=TRUE))) }
# mapfn=function(ILI) { return(coredata(lag(zoo(ILI), -2, na.pad=TRUE))) }
# mapfn=function(ILI.2.lag) { return(log(ILI.2.lag)) }
# glb_derive_lst[["<txt_var>.niso8859.log"]] <- list(
# mapfn=function(<txt_var>) { match_lst <- gregexpr("&#[[:digit:]]{3};", <txt_var>)
# match_num_vctr <- unlist(lapply(match_lst,
# function(elem) length(elem)))
# return(log(1 + match_num_vctr)) }
# , args=c("<txt_var>"))
# mapfn=function(raw) { mod_raw <- raw;
# mod_raw <- gsub("&#[[:digit:]]{3};", " ", mod_raw);
# # Modifications for this exercise only
# mod_raw <- gsub("\\bgoodIn ", "good In", mod_raw);
# return(mod_raw)
# # Create user-specified pattern vectors
# #sum(mycount_pattern_occ("Metropolitan Diary:", glb_allobs_df$Abstract) > 0)
# if (txt_var %in% c("Snippet", "Abstract")) {
# txt_X_df[, paste0(txt_var_pfx, ".P.metropolitan.diary.colon")] <-
# as.integer(0 + mycount_pattern_occ("Metropolitan Diary:",
# glb_allobs_df[, txt_var]))
#summary(glb_allobs_df[ ,grep("P.on.this.day", names(glb_allobs_df), value=TRUE)])
# glb_derive_lst[["<var1>"]] <- glb_derive_lst[["<var2>"]]
glb_derive_vars <- names(glb_derive_lst)
# tst <- "descr.my"; args_lst <- NULL; for (arg in glb_derive_lst[[tst]]$args) args_lst[[arg]] <- glb_allobs_df[, arg]; print(head(args_lst[[arg]])); print(head(drv_vals <- do.call(glb_derive_lst[[tst]]$mapfn, args_lst)));
# print(which_ix <- which(args_lst[[arg]] == 0.75)); print(drv_vals[which_ix]);
glb_date_vars <- NULL # or c("<date_var>")
glb_date_fmts <- list(); #glb_date_fmts[["<date_var>"]] <- "%m/%e/%y"
glb_date_tzs <- list(); #glb_date_tzs[["<date_var>"]] <- "America/New_York"
#grep("America/New", OlsonNames(), value=TRUE)
glb_txt_vars <- c("descr.my")
Sys.setlocale("LC_ALL", "C") # For english
## [1] "C/C/C/C/C/en_US.UTF-8"
glb_txt_munge_filenames_pfx <- "ebay_mytxt_"
glb_append_stop_words <- list()
# Remember to use unstemmed words
#orderBy(~ -cor.y.abs, subset(glb_feats_df, grepl("[HSA]\\.T\\.", id) & !is.na(cor.high.X)))
glb_append_stop_words[["descr.my"]] <- c(NULL
# freq = 1
# ,"511","825","975"
# ,"2nd"
# ,"a1314","a1430","a1432"
# ,"abused","across","adaptor","add","advised","antenna","anti","anyone","anything"
# ,"applied","applying","area","arizona","att","attached"
# ,"backlight","backlit","beetle","beginning","besides","bidder","binder"
# ,"bonus","boot","bound","brick","broke","bruises","buyers"
# ,"capacity","causing","changed","changing","cherished","chrome","classes","closely"
# ,"confidence","considerable","consumer","contents","control","cream","cuts"
# ,"daily","date","daughter"
# ,"deactivated","decent","deep","defender","defense","degree"
# ,"demonstration","depicted","depress"
# ,"difficulty","disclaimer","discoloration","distressed","divider"
# ,"dlxnqat9g5wt","dock","documents","done","dont","durable","dust","duty"
# ,"either","emblem","erased","ereader","esi","essentially","etch","etched"
# ,"every","exact","exhibition","expires"
# ,"facing","faded","faint","february","film","final","five"
# ,"flickers","folding","forgot","forwarders","freezes","freight"
# ,"games","generic","genuine","glitter","goes","grey","guide"
# ,"half","hairline","handstand","hdmi","high","higher","hold","hole","hospital"
# ,"imie","immaculate","impact"
# ,"instead","intended"
# ,"interest","interior","international","internationally","intro"
# ,"jack","july"
# ,"keeps","keyword","kids","kind","known"
# ,"largest","last","late","length","let","letters","level"
# ,"lifting","limited","line","lining","liquid","liquidation","literally","literature"
# ,"local","logic","long","longer","looping","loose","loss","lost"
# ,"mb292ll","mc707ll","mc916ll","mc991ll","md789ll","mf432ll","mgye2ll"
# ,"mic","middle", "mind","mixed","mostly"
# ,"neither","none","november"
# ,"occasional","oem","often","online","outside"
# ,"padfolio","pairing","paperwork","past"
# ,"period","pet","photograph","piece","played","plug"
# ,"poor","portfolio","portion","pouch"
# ,"preinstalled","pressure","price","proof","provided"
# ,"ranging","rather"
# ,"real","realized","reassemble","reboot","receipt","recently","red"
# ,"reflected","refunds","remote","repeat"
# ,"required","reserve","residue","restarts","result","reviewed"
# ,"ringer","roughly","rubber","running"
# ,"said","school"
# ,"seamlessly","seconds","seem","seen","semi","send","september","serious","setup"
# ,"shell","short","showroom"
# ,"sighs","site","size","sleeve","slice","smoke","smooth","smudge"
# ,"softer","software","somewhat","soon"
# ,"space","sparingly","sparkiling","special","speed","speigen"
# ,"stains","standup","start","status","stopped","strictly"
# ,"subtle","sustained","swappacom","swiped","swivel"
# ,"take","technical","tempered","texture","thank","therefore","think","though"
# ,"toddler","totally","touchy","toys","tried","typical"
# ,"university","unknown","untouched","upgrade"
# ,"valid","vary","version","virtually"
# ,"want","wavy","website","whole","winning","worn","wrapped"
# ,"zaag","zero", "zombie","zoogue"
)
#subset(glb_allobs_df, S.T.newyorktim > 0)[, c("UniqueID", "Snippet", "S.T.newyorktim")]
#glb_txt_lst[["Snippet"]][which(glb_allobs_df$UniqueID %in% c(8394, 8317, 8339, 8350, 8307))]
glb_important_terms <- list()
# Remember to use stemmed terms
glb_txt_cor_var <- glb_rsp_var # or "feat"
glb_txt_filter_terms <- "top.val" # select one from c("top.cor", "top.val", "sparse")
glb_txt_top_n <- c(50)
names(glb_txt_top_n) <- glb_txt_vars
glb_sprs_thresholds <- c(0.950) # Generates 8 terms
# Properties:
# numrows(glb_feats_df) << numrows(glb_fitobs_df)
# Select terms that appear in at least 0.2 * O(FP/FN(glb_OOBobs_df))
# numrows(glb_OOBobs_df) = 1.1 * numrows(glb_newobs_df)
names(glb_sprs_thresholds) <- glb_txt_vars
# User-specified exclusions
glb_exclude_vars_as_features <- c("productline", "description", "startprice"
#, "startprice.log", "sold"
, "prdline.my", "prdline.my.fctr"
)
if (glb_rsp_var_raw != glb_rsp_var)
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features,
glb_rsp_var_raw)
# List feats that shd be excluded due to known causation by prediction variable
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features,
c(NULL)) # or c("<col_name>")
glb_impute_na_data <- FALSE # or TRUE
glb_mice_complete.seed <- 144 # or any integer
glb_cluster <- TRUE
glb_cluster.seed <- 189 # or any integer
glb_cluster_entropy_var <- glb_rsp_var # or "<feat>"
glb_interaction_only_features <- NULL # or ???
glb_models_lst <- list(); glb_models_df <- data.frame()
# Regression
if (glb_is_regression)
glb_models_method_vctr <- c("lm", "glm", "bayesglm", "glmnet", "rpart", "rf") else
# Classification
if (glb_is_binomial)
glb_models_method_vctr <- c("glm", "bayesglm", "glmnet", "rpart", "rf") else
glb_models_method_vctr <- c("rpart", "rf")
# Baseline prediction model feature(s)
glb_Baseline_mdl_var <- NULL # or c("<col_name>")
glb_model_metric_terms <- NULL # or matrix(c(
# 0,1,2,3,4,
# 2,0,1,2,3,
# 4,2,0,1,2,
# 6,4,2,0,1,
# 8,6,4,2,0
# ), byrow=TRUE, nrow=5)
glb_model_metric <- NULL # or "<metric_name>"
glb_model_metric_maximize <- NULL # or FALSE (TRUE is not the default for both classification & regression)
glb_model_metric_smmry <- NULL # or function(data, lev=NULL, model=NULL) {
# confusion_mtrx <- t(as.matrix(confusionMatrix(data$pred, data$obs)))
# #print(confusion_mtrx)
# #print(confusion_mtrx * glb_model_metric_terms)
# metric <- sum(confusion_mtrx * glb_model_metric_terms) / nrow(data)
# names(metric) <- glb_model_metric
# return(metric)
# }
glb_tune_models_df <-
rbind(
#data.frame(parameter="cp", min=0.00005, max=0.00005, by=0.000005),
#seq(from=0.01, to=0.01, by=0.01)
#data.frame(parameter="mtry", min=080, max=100, by=10),
#data.frame(parameter="mtry", min=08, max=10, by=1),
data.frame(parameter="dummy", min=2, max=4, by=1)
)
# or NULL
glb_n_cv_folds <- 3 # or NULL
glb_clf_proba_threshold <- NULL # 0.5
# Model selection criteria
if (glb_is_regression)
glb_model_evl_criteria <- c("min.RMSE.OOB", "max.R.sq.OOB", "max.Adj.R.sq.fit")
#glb_model_evl_criteria <- c("min.RMSE.fit", "max.R.sq.fit", "max.Adj.R.sq.fit")
if (glb_is_classification) {
if (glb_is_binomial)
glb_model_evl_criteria <-
c("max.Accuracy.OOB", "max.auc.OOB", "max.Kappa.OOB", "min.aic.fit") else
glb_model_evl_criteria <- c("max.Accuracy.OOB", "max.Kappa.OOB")
}
glb_sel_mdl_id <- "Ensemble.glmnet" #select from c(NULL, "Ensemble.glmnet", "Low.cor.X.glm")
glb_fin_mdl_id <- NULL # select from c(NULL, glb_sel_mdl_id, "Final")
glb_dsp_cols <- c("sold", ".grpid", "color", "condition", "cellular", "carrier", "storage")
# Depict process
glb_analytics_pn <- petrinet(name="glb_analytics_pn",
trans_df=data.frame(id=1:6,
name=c("data.training.all","data.new",
"model.selected","model.final",
"data.training.all.prediction","data.new.prediction"),
x=c( -5,-5,-15,-25,-25,-35),
y=c( -5, 5, 0, 0, -5, 5)
),
places_df=data.frame(id=1:4,
name=c("bgn","fit.data.training.all","predict.data.new","end"),
x=c( -0, -20, -30, -40),
y=c( 0, 0, 0, 0),
M0=c( 3, 0, 0, 0)
),
arcs_df=data.frame(
begin=c("bgn","bgn","bgn",
"data.training.all","model.selected","fit.data.training.all",
"fit.data.training.all","model.final",
"data.new","predict.data.new",
"data.training.all.prediction","data.new.prediction"),
end =c("data.training.all","data.new","model.selected",
"fit.data.training.all","fit.data.training.all","model.final",
"data.training.all.prediction","predict.data.new",
"predict.data.new","data.new.prediction",
"end","end")
))
#print(ggplot.petrinet(glb_analytics_pn))
print(ggplot.petrinet(glb_analytics_pn) + coord_flip())
## Loading required package: grid
glb_analytics_avl_objs <- NULL
glb_chunks_df <- myadd_chunk(NULL, "import.data")
## label step_major step_minor bgn end elapsed
## 1 import.data 1 0 8.653 NA NA
1.0: import data#glb_chunks_df <- myadd_chunk(NULL, "import.data")
glb_trnobs_df <- myimport_data(url=glb_trnng_url, comment="glb_trnobs_df",
force_header=TRUE)
## [1] "Reading file ./data/eBayiPadTrain.csv..."
## [1] "dimensions of data in ./data/eBayiPadTrain.csv: 1,861 rows x 11 cols"
## description
## 1 iPad is in 8.5+ out of 10 cosmetic condition!
## 2 Previously used, please read description. May show signs of use such as scratches to the screen and
## 3
## 4
## 5 Please feel free to buy. All products have been thoroughly inspected, cleaned and tested to be 100%
## 6
## biddable startprice condition cellular carrier color
## 1 0 159.99 Used 0 None Black
## 2 1 0.99 Used 1 Verizon Unknown
## 3 0 199.99 Used 0 None White
## 4 0 235.00 New other (see details) 0 None Unknown
## 5 0 199.99 Seller refurbished Unknown Unknown Unknown
## 6 1 175.00 Used 1 AT&T Space Gray
## storage productline sold UniqueID
## 1 16 iPad 2 0 10001
## 2 16 iPad 2 1 10002
## 3 16 iPad 4 1 10003
## 4 16 iPad mini 2 0 10004
## 5 Unknown Unknown 0 10005
## 6 32 iPad mini 2 1 10006
## description
## 65
## 283 Pristine condition, comes with a case and stylus.
## 948 \211\333\317Used Apple Ipad 16 gig 1st generation in Great working condition and 100% functional.Very little
## 1354
## 1366 Item still in complete working order, minor scratches, normal wear and tear but no damage. screen is
## 1840
## biddable startprice condition cellular carrier color
## 65 0 195.00 Used 0 None Unknown
## 283 1 20.00 Used 0 None Unknown
## 948 0 110.00 Seller refurbished 0 None Black
## 1354 0 300.00 Used 0 None White
## 1366 1 125.00 Used Unknown Unknown Unknown
## 1840 0 249.99 Used 1 Sprint Space Gray
## storage productline sold UniqueID
## 65 16 iPad mini 0 10065
## 283 64 iPad 1 0 10283
## 948 32 iPad 1 0 10948
## 1354 16 iPad Air 1 11354
## 1366 Unknown iPad 1 1 11366
## 1840 16 iPad Air 1 11840
## description
## 1856 Overall item is in good condition and is fully operational and ready to use. Comes with box and
## 1857 Used. Tested. Guaranteed to work. Physical condition grade B+ does have some light scratches and
## 1858 This item is brand new and was never used; however, the box and/or packaging has been opened.
## 1859
## 1860 This unit has minor scratches on case and several small scratches on the display. \nIt is in
## 1861 30 Day Warranty. Fully functional engraved iPad 1st Generation with signs of normal wear which
## biddable startprice condition cellular carrier
## 1856 0 89.50 Used 1 AT&T
## 1857 0 239.95 Used 0 None
## 1858 0 329.99 New other (see details) 0 None
## 1859 0 400.00 New 0 None
## 1860 0 89.00 Seller refurbished 0 None
## 1861 0 119.99 Used 1 AT&T
## color storage productline sold UniqueID
## 1856 Unknown 16 iPad 1 0 11856
## 1857 Black 32 iPad 4 1 11857
## 1858 Space Gray 16 iPad Air 0 11858
## 1859 Gold 16 iPad mini 3 0 11859
## 1860 Black 64 iPad 1 1 11860
## 1861 Black 64 iPad 1 0 11861
## 'data.frame': 1861 obs. of 11 variables:
## $ description: chr "iPad is in 8.5+ out of 10 cosmetic condition!" "Previously used, please read description. May show signs of use such as scratches to the screen and " "" "" ...
## $ biddable : int 0 1 0 0 0 1 1 0 1 1 ...
## $ startprice : num 159.99 0.99 199.99 235 199.99 ...
## $ condition : chr "Used" "Used" "Used" "New other (see details)" ...
## $ cellular : chr "0" "1" "0" "0" ...
## $ carrier : chr "None" "Verizon" "None" "None" ...
## $ color : chr "Black" "Unknown" "White" "Unknown" ...
## $ storage : chr "16" "16" "16" "16" ...
## $ productline: chr "iPad 2" "iPad 2" "iPad 4" "iPad mini 2" ...
## $ sold : int 0 1 1 0 0 1 1 0 1 1 ...
## $ UniqueID : int 10001 10002 10003 10004 10005 10006 10007 10008 10009 10010 ...
## - attr(*, "comment")= chr "glb_trnobs_df"
## NULL
# glb_trnobs_df <- read.delim("data/hygiene.txt", header=TRUE, fill=TRUE, sep="\t",
# fileEncoding='iso-8859-1')
# glb_trnobs_df <- read.table("data/hygiene.dat.labels", col.names=c("dirty"),
# na.strings="[none]")
# glb_trnobs_df$review <- readLines("data/hygiene.dat", n =-1)
# comment(glb_trnobs_df) <- "glb_trnobs_df"
# glb_trnobs_df <- data.frame()
# for (symbol in c("Boeing", "CocaCola", "GE", "IBM", "ProcterGamble")) {
# sym_trnobs_df <-
# myimport_data(url=gsub("IBM", symbol, glb_trnng_url), comment="glb_trnobs_df",
# force_header=TRUE)
# sym_trnobs_df$Symbol <- symbol
# glb_trnobs_df <- myrbind_df(glb_trnobs_df, sym_trnobs_df)
# }
# glb_trnobs_df <-
# glb_trnobs_df %>% dplyr::filter(Year >= 1999)
if (glb_is_separate_newobs_dataset) {
glb_newobs_df <- myimport_data(url=glb_newdt_url, comment="glb_newobs_df",
force_header=TRUE)
# To make plots / stats / checks easier in chunk:inspectORexplore.data
glb_allobs_df <- myrbind_df(glb_trnobs_df, glb_newobs_df);
comment(glb_allobs_df) <- "glb_allobs_df"
} else {
glb_allobs_df <- glb_trnobs_df; comment(glb_allobs_df) <- "glb_allobs_df"
if (!glb_split_entity_newobs_datasets) {
stop("Not implemented yet")
glb_newobs_df <- glb_trnobs_df[sample(1:nrow(glb_trnobs_df),
max(2, nrow(glb_trnobs_df) / 1000)),]
} else if (glb_split_newdata_method == "condition") {
glb_newobs_df <- do.call("subset",
list(glb_trnobs_df, parse(text=glb_split_newdata_condition)))
glb_trnobs_df <- do.call("subset",
list(glb_trnobs_df, parse(text=paste0("!(",
glb_split_newdata_condition,
")"))))
} else if (glb_split_newdata_method == "sample") {
require(caTools)
set.seed(glb_split_sample.seed)
split <- sample.split(glb_trnobs_df[, glb_rsp_var_raw],
SplitRatio=(1-glb_split_newdata_size_ratio))
glb_newobs_df <- glb_trnobs_df[!split, ]
glb_trnobs_df <- glb_trnobs_df[split ,]
} else if (glb_split_newdata_method == "copy") {
glb_trnobs_df <- glb_allobs_df
comment(glb_trnobs_df) <- "glb_trnobs_df"
glb_newobs_df <- glb_allobs_df
comment(glb_newobs_df) <- "glb_newobs_df"
} else stop("glb_split_newdata_method should be %in% c('condition', 'sample', 'copy')")
comment(glb_newobs_df) <- "glb_newobs_df"
myprint_df(glb_newobs_df)
str(glb_newobs_df)
if (glb_split_entity_newobs_datasets) {
myprint_df(glb_trnobs_df)
str(glb_trnobs_df)
}
}
## [1] "Reading file ./data/eBayiPadTest.csv..."
## [1] "dimensions of data in ./data/eBayiPadTest.csv: 798 rows x 10 cols"
## description
## 1 like new
## 2 Item is in great shape. I upgraded to the iPad Air 2 and don't need the mini any longer, even though
## 3 This iPad is working and is tested 100%. It runs great. It is in good condition. Cracked digitizer.
## 4
## 5 Grade A condition means that the Ipad is 100% working condition. Cosmetically 8/9 out of 10 - Will
## 6 Brand new factory sealed iPad in an OPEN BOX...THE BOX ITSELF IS HEAVILY DISTRESSED(see
## biddable startprice condition cellular carrier color
## 1 0 105.00 Used 1 AT&T Unknown
## 2 0 195.00 Used 0 None Unknown
## 3 0 219.99 Used 0 None Unknown
## 4 1 100.00 Used 0 None Unknown
## 5 0 210.99 Manufacturer refurbished 0 None Black
## 6 0 514.95 New other (see details) 0 None Gold
## storage productline UniqueID
## 1 32 iPad 1 11862
## 2 16 iPad mini 2 11863
## 3 64 iPad 3 11864
## 4 16 iPad mini 11865
## 5 32 iPad 3 11866
## 6 64 iPad Air 2 11867
## description
## 1 like new
## 142 iPad mini 1st gen wi-fi 16gb is in perfect working order.
## 309 In excellent condition. Minor scratches on the back. Screen in mint condition. Comes in original
## 312 iPad is in Great condition, the screen is in great condition showing only a few minor scratches, the
## 320 Good condition and fully functional
## 369
## biddable startprice condition cellular carrier color storage
## 1 0 105.00 Used 1 AT&T Unknown 32
## 142 1 0.99 Used 0 None Unknown 16
## 309 0 200.00 Used 1 AT&T Black 32
## 312 1 0.99 Used 0 None Unknown 16
## 320 1 60.00 Used 0 None White 16
## 369 1 197.97 Used 0 None Unknown 64
## productline UniqueID
## 1 iPad 1 11862
## 142 iPad mini 12003
## 309 iPad 3 12170
## 312 iPad mini 2 12173
## 320 iPad 1 12181
## 369 iPad mini 3 12230
## description
## 793 Crack on digitizer near top. Top line of digitizer does not respond to touch. Other than that, all
## 794
## 795
## 796
## 797
## 798 Slightly Used. Includes everything you need plus a nice leather case!\nThere is a slice mark on the
## biddable startprice condition cellular carrier color
## 793 0 104.00 For parts or not working 1 Unknown Black
## 794 0 95.00 Used 1 AT&T Unknown
## 795 1 199.99 Manufacturer refurbished 0 None White
## 796 0 149.99 Used 0 None Unknown
## 797 0 7.99 New Unknown Unknown Unknown
## 798 0 139.00 Used 1 Unknown Black
## storage productline UniqueID
## 793 16 iPad 2 12654
## 794 64 iPad 1 12655
## 795 16 iPad 4 12656
## 796 16 iPad 2 12657
## 797 Unknown iPad 3 12658
## 798 32 Unknown 12659
## 'data.frame': 798 obs. of 10 variables:
## $ description: chr "like new" "Item is in great shape. I upgraded to the iPad Air 2 and don't need the mini any longer, even though " "This iPad is working and is tested 100%. It runs great. It is in good condition. Cracked digitizer." "" ...
## $ biddable : int 0 0 0 1 0 0 0 0 0 1 ...
## $ startprice : num 105 195 220 100 211 ...
## $ condition : chr "Used" "Used" "Used" "Used" ...
## $ cellular : chr "1" "0" "0" "0" ...
## $ carrier : chr "AT&T" "None" "None" "None" ...
## $ color : chr "Unknown" "Unknown" "Unknown" "Unknown" ...
## $ storage : chr "32" "16" "64" "16" ...
## $ productline: chr "iPad 1" "iPad mini 2" "iPad 3" "iPad mini" ...
## $ UniqueID : int 11862 11863 11864 11865 11866 11867 11868 11869 11870 11871 ...
## - attr(*, "comment")= chr "glb_newobs_df"
## NULL
if ((num_nas <- sum(is.na(glb_trnobs_df[, glb_rsp_var_raw]))) > 0)
stop("glb_trnobs_df$", glb_rsp_var_raw, " contains NAs for ", num_nas, " obs")
if (nrow(glb_trnobs_df) == nrow(glb_allobs_df))
warning("glb_trnobs_df same as glb_allobs_df")
if (nrow(glb_newobs_df) == nrow(glb_allobs_df))
warning("glb_newobs_df same as glb_allobs_df")
if (length(glb_drop_vars) > 0) {
warning("dropping vars: ", paste0(glb_drop_vars, collapse=", "))
glb_allobs_df <- glb_allobs_df[, setdiff(names(glb_allobs_df), glb_drop_vars)]
glb_trnobs_df <- glb_trnobs_df[, setdiff(names(glb_trnobs_df), glb_drop_vars)]
glb_newobs_df <- glb_newobs_df[, setdiff(names(glb_newobs_df), glb_drop_vars)]
}
#stop(here"); sav_allobs_df <- glb_allobs_df # glb_allobs_df <- sav_allobs_df
# Combine trnent & newobs into glb_allobs_df for easier manipulation
glb_trnobs_df$.src <- "Train"; glb_newobs_df$.src <- "Test";
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, ".src")
glb_allobs_df <- myrbind_df(glb_trnobs_df, glb_newobs_df)
comment(glb_allobs_df) <- "glb_allobs_df"
# Check for duplicates in glb_id_var
if (length(glb_id_var) == 0) {
warning("using .rownames as identifiers for observations")
glb_allobs_df$.rownames <- rownames(glb_allobs_df)
glb_trnobs_df$.rownames <- rownames(subset(glb_allobs_df, .src == "Train"))
glb_newobs_df$.rownames <- rownames(subset(glb_allobs_df, .src == "Test"))
glb_id_var <- ".rownames"
}
if (sum(duplicated(glb_allobs_df[, glb_id_var, FALSE])) > 0)
stop(glb_id_var, " duplicated in glb_allobs_df")
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, glb_id_var)
glb_allobs_df <- orderBy(reformulate(glb_id_var), glb_allobs_df)
glb_trnobs_df <- glb_newobs_df <- NULL
# For Tableau
write.csv(glb_allobs_df, "data/eBayiPadAll.csv", row.names=FALSE)
#stop(here")
glb_drop_obs <- c(
11234, #sold=0; 2 other dups(10306, 11503) are sold=1
11844, #sold=0; 3 other dups(11721, 11738, 11812) are sold=1
NULL)
glb_allobs_df <- glb_allobs_df[!glb_allobs_df[, glb_id_var] %in% glb_drop_obs, ]
# Make any data corrections here
glb_allobs_df[glb_allobs_df[, glb_id_var] == 10986, "cellular"] <- "1"
glb_allobs_df[glb_allobs_df[, glb_id_var] == 10986, "carrier"] <- "T-Mobile"
# Check for duplicates by all features
require(gdata)
## Loading required package: gdata
## gdata: read.xls support for 'XLS' (Excel 97-2004) files ENABLED.
##
## gdata: read.xls support for 'XLSX' (Excel 2007+) files ENABLED.
##
## Attaching package: 'gdata'
##
## The following object is masked from 'package:stats':
##
## nobs
##
## The following object is masked from 'package:utils':
##
## object.size
#print(names(glb_allobs_df))
dup_allobs_df <- glb_allobs_df[duplicated2(subset(glb_allobs_df,
select=-c(UniqueID, sold, .src))), ]
dup_allobs_df <- orderBy(~productline+description+startprice+biddable, dup_allobs_df)
print(sprintf("Found %d duplicates by all features:", nrow(dup_allobs_df)))
## [1] "Found 304 duplicates by all features:"
myprint_df(dup_allobs_df)
## description biddable startprice condition cellular
## 1711 1 0.99 For parts or not working Unknown
## 2608 1 0.99 For parts or not working Unknown
## 293 1 5.00 Used Unknown
## 478 1 5.00 Used Unknown
## 385 0 15.00 Used 0
## 390 0 15.00 Used 0
## carrier color storage productline sold UniqueID .src
## 1711 Unknown Unknown 16 Unknown 1 11711 Train
## 2608 Unknown Unknown 16 Unknown NA 12608 Test
## 293 Unknown White 16 Unknown 1 10293 Train
## 478 Unknown White 16 Unknown 1 10478 Train
## 385 None Black 16 Unknown 0 10385 Train
## 390 None Black 16 Unknown 0 10390 Train
## description biddable startprice condition cellular
## 1956 1 0.99 Used 0
## 828 1 249.97 Manufacturer refurbished 1
## 3 0 199.99 Used 0
## 1649 0 209.00 For parts or not working Unknown
## 2111 1 200.00 Used 0
## 172 0 269.00 Used 0
## carrier color storage productline sold UniqueID .src
## 1956 None Unknown 16 iPad 2 NA 11956 Test
## 828 Unknown Black 64 iPad 2 0 10828 Train
## 3 None White 16 iPad 4 1 10003 Train
## 1649 Unknown Unknown 16 iPad Air 0 11649 Train
## 2111 None Space Gray 64 iPad mini 2 NA 12111 Test
## 172 None Unknown 32 iPad mini 2 0 10172 Train
## description biddable startprice condition cellular carrier color
## 8 0 329.99 New 0 None White
## 660 0 329.99 New 0 None White
## 319 0 345.00 New 0 None Gold
## 1886 0 345.00 New 0 None Gold
## 1363 0 498.88 New 1 Verizon Gold
## 1394 0 498.88 New 1 Verizon Gold
## storage productline sold UniqueID .src
## 8 16 iPad mini 3 0 10008 Train
## 660 16 iPad mini 3 0 10660 Train
## 319 16 iPad mini 3 1 10319 Train
## 1886 16 iPad mini 3 NA 11886 Test
## 1363 16 iPad mini 3 0 11363 Train
## 1394 16 iPad mini 3 0 11394 Train
# print(dup_allobs_df[, c(glb_id_var, glb_rsp_var_raw,
# "description", "startprice", "biddable")])
# write.csv(dup_allobs_df[, c("UniqueID"), FALSE], "ebayipads_dups.csv", row.names=FALSE)
dupobs_df <- tidyr::unite(dup_allobs_df, "allfeats", -c(sold, UniqueID, .src), sep="#")
# dupobs_df <- dplyr::group_by(dupobs_df, allfeats)
# dupobs_df <- dupobs_df[, "UniqueID", FALSE]
# dupobs_df <- ungroup(dupobs_df)
#
# dupobs_df$.rownames <- row.names(dupobs_df)
grpobs_df <- data.frame(allfeats=unique(dupobs_df[, "allfeats"]))
grpobs_df$.grpid <- row.names(grpobs_df)
dupobs_df <- merge(dupobs_df, grpobs_df)
# dupobs_tbl <- table(dupobs_df$.grpid)
# print(max(dupobs_tbl))
# print(dupobs_tbl[which.max(dupobs_tbl)])
# print(dupobs_df[dupobs_df$.grpid == names(dupobs_tbl[which.max(dupobs_tbl)]), ])
# print(dupobs_df[dupobs_df$.grpid == 106, ])
# for (grpid in c(9, 17, 31, 36, 53))
# print(dupobs_df[dupobs_df$.grpid == grpid, ])
dupgrps_df <- as.data.frame(table(dupobs_df$.grpid, dupobs_df$sold, useNA="ifany"))
names(dupgrps_df)[c(1,2)] <- c(".grpid", "sold")
dupgrps_df$.grpid <- as.numeric(as.character(dupgrps_df$.grpid))
dupgrps_df <- tidyr::spread(dupgrps_df, sold, Freq)
names(dupgrps_df)[-1] <- paste("sold", names(dupgrps_df)[-1], sep=".")
dupgrps_df$.freq <- sapply(1:nrow(dupgrps_df), function(row) sum(dupgrps_df[row, -1]))
myprint_df(orderBy(~-.freq, dupgrps_df))
## .grpid sold.0 sold.1 sold.NA .freq
## 40 40 0 6 3 9
## 106 106 0 4 1 5
## 9 9 0 1 3 4
## 17 17 0 3 1 4
## 36 36 0 3 1 4
## 53 53 0 2 2 4
## .grpid sold.0 sold.1 sold.NA .freq
## 10 10 0 2 0 2
## 42 42 0 1 1 2
## 57 57 1 0 1 2
## 66 66 1 0 1 2
## 91 91 0 1 1 2
## 101 101 0 1 1 2
## .grpid sold.0 sold.1 sold.NA .freq
## 130 130 1 0 1 2
## 131 131 1 1 0 2
## 132 132 0 1 1 2
## 133 133 2 0 0 2
## 134 134 0 1 1 2
## 135 135 2 0 0 2
print("sold Conflicts:")
## [1] "sold Conflicts:"
print(subset(dupgrps_df, (sold.0 > 0) & (sold.1 > 0)))
## .grpid sold.0 sold.1 sold.NA .freq
## 4 4 1 1 0 2
## 22 22 1 1 0 2
## 23 23 1 1 0 2
## 74 74 1 1 0 2
## 83 83 1 1 0 2
## 84 84 1 1 0 2
## 95 95 1 1 0 2
## 102 102 1 1 0 2
## 109 109 1 1 0 2
## 111 111 1 1 0 2
## 122 122 1 1 0 2
## 131 131 1 1 0 2
#dupobs_df[dupobs_df$.grpid == 4, ]
if (nrow(subset(dupgrps_df, (sold.0 > 0) & (sold.1 > 0) & (sold.0 != sold.1))) > 0)
stop("Duplicate conflicts are resolvable")
print("Test & Train Groups:")
## [1] "Test & Train Groups:"
print(subset(dupgrps_df, (sold.NA > 0)))
## .grpid sold.0 sold.1 sold.NA .freq
## 1 1 0 1 1 2
## 5 5 1 0 1 2
## 7 7 0 0 2 2
## 8 8 1 0 1 2
## 9 9 0 1 3 4
## 12 12 0 0 2 2
## 14 14 0 1 1 2
## 15 15 0 0 2 2
## 17 17 0 3 1 4
## 18 18 0 2 1 3
## 19 19 0 2 1 3
## 24 24 0 2 1 3
## 26 26 1 0 1 2
## 28 28 1 0 1 2
## 30 30 0 1 1 2
## 32 32 0 0 2 2
## 33 33 0 1 1 2
## 35 35 0 2 1 3
## 36 36 0 3 1 4
## 37 37 0 0 2 2
## 38 38 0 1 1 2
## 40 40 0 6 3 9
## 41 41 0 0 2 2
## 42 42 0 1 1 2
## 43 43 0 1 1 2
## 44 44 0 2 1 3
## 47 47 0 1 1 2
## 48 48 0 0 2 2
## 49 49 0 1 2 3
## 51 51 0 1 1 2
## 53 53 0 2 2 4
## 54 54 0 1 1 2
## 55 55 1 0 2 3
## 56 56 1 0 1 2
## 57 57 1 0 1 2
## 58 58 0 0 2 2
## 59 59 1 0 1 2
## 60 60 1 0 1 2
## 63 63 0 1 1 2
## 66 66 1 0 1 2
## 67 67 1 0 1 2
## 68 68 0 0 2 2
## 69 69 1 0 1 2
## 73 73 0 1 1 2
## 76 76 0 2 1 3
## 86 86 0 0 2 2
## 87 87 1 0 1 2
## 89 89 1 0 1 2
## 90 90 0 0 2 2
## 91 91 0 1 1 2
## 93 93 0 1 1 2
## 94 94 1 0 1 2
## 99 99 0 1 1 2
## 101 101 0 1 1 2
## 103 103 0 1 1 2
## 104 104 1 0 1 2
## 106 106 0 4 1 5
## 107 107 0 1 1 2
## 108 108 0 1 1 2
## 112 112 1 0 1 2
## 114 114 0 1 1 2
## 115 115 0 1 1 2
## 116 116 1 0 1 2
## 117 117 0 2 1 3
## 118 118 0 1 1 2
## 121 121 1 0 1 2
## 124 124 1 0 1 2
## 128 128 0 1 1 2
## 130 130 1 0 1 2
## 132 132 0 1 1 2
## 134 134 0 1 1 2
glb_allobs_df <- merge(glb_allobs_df, dupobs_df[, c(glb_id_var, ".grpid")],
by=glb_id_var, all.x=TRUE)
glb_exclude_vars_as_features <- c(".grpid", glb_exclude_vars_as_features)
# !_sp
spd_allobs_df <- read.csv(paste0(glb_out_pfx, "sp_predict.csv"))
if (nrow(spd_allobs_df) != nrow(glb_allobs_df))
stop("mismatches between spd_allobs_df & glb_allobs_df")
mrg_allobs_df <- merge(glb_allobs_df, spd_allobs_df)
if (nrow(mrg_allobs_df) != nrow(glb_allobs_df))
stop("mismatches between mrg_allobs_df & glb_allobs_df")
mrg_allobs_df$startprice.diff <- mrg_allobs_df$startprice -
mrg_allobs_df$startprice.predict.
print(myplot_scatter(mrg_allobs_df, "startprice", "startprice.diff",
colorcol_name = "biddable"))
## Warning in myplot_scatter(mrg_allobs_df, "startprice", "startprice.diff", :
## converting biddable to class:factor
print(myplot_histogram(mrg_allobs_df, "startprice.diff",
fill_col_name = "biddable"))
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
glb_allobs_df <- mrg_allobs_df
glb_exclude_vars_as_features <- c(glb_exclude_vars_as_features,
"startprice.log", "startprice.predict.")
###
#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
# Only for _sp
# print(table(glb_allobs_df$sold, glb_allobs_df$.src, useNA = "ifany"))
# print(table(glb_allobs_df$sold, glb_allobs_df$biddable, glb_allobs_df$.src,
# useNA = "ifany"))
# glb_allobs_df$.src <- "Test"
# glb_allobs_df[!is.na(glb_allobs_df$sold) & (glb_allobs_df$sold == 1), ".src"] <- "Train"
# print(table(glb_allobs_df$sold, glb_allobs_df$.src, useNA = "ifany"))
# print(table(glb_allobs_df$sold, glb_allobs_df$biddable, glb_allobs_df$.src,
# useNA = "ifany"))
###
glb_chunks_df <- myadd_chunk(glb_chunks_df, "inspect.data", major.inc=TRUE)
## label step_major step_minor bgn end elapsed
## 1 import.data 1 0 8.653 12.56 3.907
## 2 inspect.data 2 0 12.560 NA NA
2.0: inspect data#print(str(glb_allobs_df))
#View(glb_allobs_df)
dsp_class_dstrb <- function(var) {
xtab_df <- mycreate_xtab_df(glb_allobs_df, c(".src", var))
rownames(xtab_df) <- xtab_df$.src
xtab_df <- subset(xtab_df, select=-.src)
print(xtab_df)
print(xtab_df / rowSums(xtab_df, na.rm=TRUE))
}
# Performed repeatedly in other chunks
glb_chk_data <- function() {
# Histogram of predictor in glb_trnobs_df & glb_newobs_df
print(myplot_histogram(glb_allobs_df, glb_rsp_var_raw) + facet_wrap(~ .src))
if (glb_is_classification)
dsp_class_dstrb(var=ifelse(glb_rsp_var %in% names(glb_allobs_df),
glb_rsp_var, glb_rsp_var_raw))
mycheck_problem_data(glb_allobs_df)
}
glb_chk_data()
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Loading required package: reshape2
## sold.0 sold.1 sold.NA
## Test NA NA 798
## Train 999 860 NA
## sold.0 sold.1 sold.NA
## Test NA NA 1
## Train 0.5373857 0.4626143 NA
## [1] "numeric data missing in : "
## sold
## 798
## [1] "numeric data w/ 0s in : "
## biddable sold
## 1444 999
## [1] "numeric data w/ Infs in : "
## named integer(0)
## [1] "numeric data w/ NaNs in : "
## named integer(0)
## [1] "string data missing in : "
## description condition cellular carrier color storage
## 1520 0 0 0 0 0
## productline .grpid
## 0 NA
# Create new features that help diagnostics
if (!is.null(glb_map_rsp_raw_to_var)) {
glb_allobs_df[, glb_rsp_var] <-
glb_map_rsp_raw_to_var(glb_allobs_df[, glb_rsp_var_raw])
mycheck_map_results(mapd_df=glb_allobs_df,
from_col_name=glb_rsp_var_raw, to_col_name=glb_rsp_var)
if (glb_is_classification) dsp_class_dstrb(glb_rsp_var)
}
## Loading required package: sqldf
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
## Loading required package: DBI
## Loading required package: tcltk
## sold sold.fctr .n
## 1 0 N 999
## 2 1 Y 860
## 3 NA <NA> 798
## Warning: Removed 1 rows containing missing values (position_stack).
## sold.fctr.N sold.fctr.Y sold.fctr.NA
## Test NA NA 798
## Train 999 860 NA
## sold.fctr.N sold.fctr.Y sold.fctr.NA
## Test NA NA 1
## Train 0.5373857 0.4626143 NA
# check distribution of all numeric data
dsp_numeric_feats_dstrb <- function(feats_vctr) {
for (feat in feats_vctr) {
print(sprintf("feat: %s", feat))
if (glb_is_regression)
gp <- myplot_scatter(df=glb_allobs_df, ycol_name=glb_rsp_var, xcol_name=feat,
smooth=TRUE)
if (glb_is_classification)
gp <- myplot_box(df=glb_allobs_df, ycol_names=feat, xcol_name=glb_rsp_var)
if (inherits(glb_allobs_df[, feat], "factor"))
gp <- gp + facet_wrap(reformulate(feat))
print(gp)
}
}
# dsp_numeric_vars_dstrb(setdiff(names(glb_allobs_df),
# union(myfind_chr_cols_df(glb_allobs_df),
# c(glb_rsp_var_raw, glb_rsp_var))))
add_new_diag_feats <- function(obs_df, ref_df=glb_allobs_df) {
require(plyr)
set.seed(169)
obs_df <- mutate(obs_df,
# <col_name>.NA=is.na(<col_name>),
# <col_name>.fctr=factor(<col_name>,
# as.factor(union(obs_df$<col_name>, obs_twin_df$<col_name>))),
# <col_name>.fctr=relevel(factor(<col_name>,
# as.factor(union(obs_df$<col_name>, obs_twin_df$<col_name>))),
# "<ref_val>"),
# <col2_name>.fctr=relevel(factor(ifelse(<col1_name> == <val>, "<oth_val>", "<ref_val>")),
# as.factor(c("R", "<ref_val>")),
# ref="<ref_val>"),
# This doesn't work - use sapply instead
# <col_name>.fctr_num=grep(<col_name>, levels(<col_name>.fctr)),
#
# Date.my=as.Date(strptime(Date, "%m/%d/%y %H:%M")),
# Year=year(Date.my),
# Month=months(Date.my),
# Weekday=weekdays(Date.my)
# <col_name>=<table>[as.character(<col2_name>)],
# <col_name>=as.numeric(<col2_name>),
# <col_name> = trunc(<col2_name> / 100),
.rnorm = rnorm(n=nrow(obs_df))
)
# If levels of a factor are different across obs_df & glb_newobs_df; predict.glm fails
# Transformations not handled by mutate
# obs_df$<col_name>.fctr.num <- sapply(1:nrow(obs_df),
# function(row_ix) grep(obs_df[row_ix, "<col_name>"],
# levels(obs_df[row_ix, "<col_name>.fctr"])))
#print(summary(obs_df))
#print(sapply(names(obs_df), function(col) sum(is.na(obs_df[, col]))))
return(obs_df)
}
glb_allobs_df <- add_new_diag_feats(glb_allobs_df)
## Loading required package: plyr
require(dplyr)
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
##
## The following objects are masked from 'package:plyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following objects are masked from 'package:gdata':
##
## combine, first, last
##
## The following objects are masked from 'package:stats':
##
## filter, lag
##
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#stop(here"); sav_allobs_df <- glb_allobs_df # glb_allobs_df <- sav_allobs_df
# Merge some <descriptor>
# glb_allobs_df$<descriptor>.my <- glb_allobs_df$<descriptor>
# glb_allobs_df[grepl("\\bAIRPORT\\b", glb_allobs_df$<descriptor>.my),
# "<descriptor>.my"] <- "AIRPORT"
# glb_allobs_df$<descriptor>.my <-
# plyr::revalue(glb_allobs_df$<descriptor>.my, c(
# "ABANDONED BUILDING" = "OTHER",
# "##" = "##"
# ))
# print(<descriptor>_freq_df <- mycreate_sqlxtab_df(glb_allobs_df, c("<descriptor>.my")))
# # print(dplyr::filter(<descriptor>_freq_df, grepl("(MEDICAL|DENTAL|OFFICE)", <descriptor>.my)))
# # print(dplyr::filter(dplyr::select(glb_allobs_df, -<var.zoo>),
# # grepl("STORE", <descriptor>.my)))
# glb_exclude_vars_as_features <- c(glb_exclude_vars_as_features, "<descriptor>")
# Check distributions of newly transformed / extracted vars
# Enhancement: remove vars that were displayed ealier
dsp_numeric_feats_dstrb(feats_vctr=setdiff(names(glb_allobs_df),
c(myfind_chr_cols_df(glb_allobs_df), glb_rsp_var_raw, glb_rsp_var,
glb_exclude_vars_as_features)))
## [1] "feat: biddable"
## [1] "feat: startprice.diff"
## [1] "feat: .rnorm"
# Convert factors to dummy variables
# Build splines require(splines); bsBasis <- bs(training$age, df=3)
#pairs(subset(glb_trnobs_df, select=-c(col_symbol)))
# Check for glb_newobs_df & glb_trnobs_df features range mismatches
# Other diagnostics:
# print(subset(glb_trnobs_df, <col1_name> == max(glb_trnobs_df$<col1_name>, na.rm=TRUE) &
# <col2_name> <= mean(glb_trnobs_df$<col1_name>, na.rm=TRUE)))
# print(glb_trnobs_df[which.max(glb_trnobs_df$<col_name>),])
# print(<col_name>_freq_glb_trnobs_df <- mycreate_tbl_df(glb_trnobs_df, "<col_name>"))
# print(which.min(table(glb_trnobs_df$<col_name>)))
# print(which.max(table(glb_trnobs_df$<col_name>)))
# print(which.max(table(glb_trnobs_df$<col1_name>, glb_trnobs_df$<col2_name>)[, 2]))
# print(table(glb_trnobs_df$<col1_name>, glb_trnobs_df$<col2_name>))
# print(table(is.na(glb_trnobs_df$<col1_name>), glb_trnobs_df$<col2_name>))
# print(table(sign(glb_trnobs_df$<col1_name>), glb_trnobs_df$<col2_name>))
# print(mycreate_xtab_df(glb_trnobs_df, <col1_name>))
# print(mycreate_xtab_df(glb_trnobs_df, c(<col1_name>, <col2_name>)))
# print(<col1_name>_<col2_name>_xtab_glb_trnobs_df <-
# mycreate_xtab_df(glb_trnobs_df, c("<col1_name>", "<col2_name>")))
# <col1_name>_<col2_name>_xtab_glb_trnobs_df[is.na(<col1_name>_<col2_name>_xtab_glb_trnobs_df)] <- 0
# print(<col1_name>_<col2_name>_xtab_glb_trnobs_df <-
# mutate(<col1_name>_<col2_name>_xtab_glb_trnobs_df,
# <col3_name>=(<col1_name> * 1.0) / (<col1_name> + <col2_name>)))
# print(mycreate_sqlxtab_df(glb_allobs_df, c("<col1_name>", "<col2_name>")))
# print(<col2_name>_min_entity_arr <-
# sort(tapply(glb_trnobs_df$<col1_name>, glb_trnobs_df$<col2_name>, min, na.rm=TRUE)))
# print(<col1_name>_na_by_<col2_name>_arr <-
# sort(tapply(glb_trnobs_df$<col1_name>.NA, glb_trnobs_df$<col2_name>, mean, na.rm=TRUE)))
# Other plots:
# print(myplot_box(df=glb_trnobs_df, ycol_names="<col1_name>"))
# print(myplot_box(df=glb_trnobs_df, ycol_names="<col1_name>", xcol_name="<col2_name>"))
# print(myplot_line(subset(glb_trnobs_df, Symbol %in% c("CocaCola", "ProcterGamble")),
# "Date.POSIX", "StockPrice", facet_row_colnames="Symbol") +
# geom_vline(xintercept=as.numeric(as.POSIXlt("2003-03-01"))) +
# geom_vline(xintercept=as.numeric(as.POSIXlt("1983-01-01")))
# )
# print(myplot_line(subset(glb_trnobs_df, Date.POSIX > as.POSIXct("2004-01-01")),
# "Date.POSIX", "StockPrice") +
# geom_line(aes(color=Symbol)) +
# coord_cartesian(xlim=c(as.POSIXct("1990-01-01"),
# as.POSIXct("2000-01-01"))) +
# coord_cartesian(ylim=c(0, 250)) +
# geom_vline(xintercept=as.numeric(as.POSIXlt("1997-09-01"))) +
# geom_vline(xintercept=as.numeric(as.POSIXlt("1997-11-01")))
# )
# print(myplot_scatter(glb_allobs_df, "<col1_name>", "<col2_name>", smooth=TRUE))
# print(myplot_scatter(glb_allobs_df, "<col1_name>", "<col2_name>", colorcol_name="<Pred.fctr>") +
# geom_point(data=subset(glb_allobs_df, <condition>),
# mapping=aes(x=<x_var>, y=<y_var>), color="red", shape=4, size=5) +
# geom_vline(xintercept=84))
glb_chunks_df <- myadd_chunk(glb_chunks_df, "scrub.data", major.inc=FALSE)
## label step_major step_minor bgn end elapsed
## 2 inspect.data 2 0 12.56 16.48 3.92
## 3 scrub.data 2 1 16.48 NA NA
2.1: scrub datamycheck_problem_data(glb_allobs_df)
## [1] "numeric data missing in : "
## sold sold.fctr
## 798 798
## [1] "numeric data w/ 0s in : "
## biddable sold
## 1444 999
## [1] "numeric data w/ Infs in : "
## named integer(0)
## [1] "numeric data w/ NaNs in : "
## named integer(0)
## [1] "string data missing in : "
## description condition cellular carrier color storage
## 1520 0 0 0 0 0
## productline .grpid
## 0 NA
findOffendingCharacter <- function(x, maxStringLength=256){
print(x)
for (c in 1:maxStringLength){
offendingChar <- substr(x,c,c)
#print(offendingChar) #uncomment if you want the indiv characters printed
#the next character is the offending multibyte Character
}
}
# string_vector <- c("test", "Se\x96ora", "works fine")
# lapply(string_vector, findOffendingCharacter)
# lapply(glb_allobs_df$description[29], findOffendingCharacter)
dsp_hdlxtab <- function(str)
print(mycreate_sqlxtab_df(glb_allobs_df[sel_obs(Headline.contains=str), ],
c("Headline.pfx", "Headline", glb_rsp_var)))
#dsp_hdlxtab("(1914)|(1939)")
dsp_catxtab <- function(str)
print(mycreate_sqlxtab_df(glb_allobs_df[sel_obs(Headline.contains=str), ],
c("Headline.pfx", "NewsDesk", "SectionName", "SubsectionName", glb_rsp_var)))
# dsp_catxtab("1914)|(1939)")
# dsp_catxtab("19(14|39|64):")
# dsp_catxtab("19..:")
# Merge some categories
# glb_allobs_df$myCategory <-
# plyr::revalue(glb_allobs_df$myCategory, c(
# "#Business Day#Dealbook" = "Business#Business Day#Dealbook",
# "#Business Day#Small Business" = "Business#Business Day#Small Business",
# "dummy" = "dummy"
# ))
# ctgry_xtab_df <- orderBy(reformulate(c("-", ".n")),
# mycreate_sqlxtab_df(glb_allobs_df,
# c("myCategory", "NewsDesk", "SectionName", "SubsectionName", glb_rsp_var)))
# myprint_df(ctgry_xtab_df)
# write.table(ctgry_xtab_df, paste0(glb_out_pfx, "ctgry_xtab.csv"),
# row.names=FALSE)
# ctgry_cast_df <- orderBy(~ -Y -NA, dcast(ctgry_xtab_df,
# myCategory + NewsDesk + SectionName + SubsectionName ~
# Popular.fctr, sum, value.var=".n"))
# myprint_df(ctgry_cast_df)
# write.table(ctgry_cast_df, paste0(glb_out_pfx, "ctgry_cast.csv"),
# row.names=FALSE)
# print(ctgry_sum_tbl <- table(glb_allobs_df$myCategory, glb_allobs_df[, glb_rsp_var],
# useNA="ifany"))
dsp_chisq.test <- function(...) {
sel_df <- glb_allobs_df[sel_obs(...) &
!is.na(glb_allobs_df$Popular), ]
sel_df$.marker <- 1
ref_df <- glb_allobs_df[!is.na(glb_allobs_df$Popular), ]
mrg_df <- merge(ref_df[, c(glb_id_var, "Popular")],
sel_df[, c(glb_id_var, ".marker")], all.x=TRUE)
mrg_df[is.na(mrg_df)] <- 0
print(mrg_tbl <- table(mrg_df$.marker, mrg_df$Popular))
print("Rows:Selected; Cols:Popular")
#print(mrg_tbl)
print(chisq.test(mrg_tbl))
}
# dsp_chisq.test(Headline.contains="[Ee]bola")
# dsp_chisq.test(Snippet.contains="[Ee]bola")
# dsp_chisq.test(Abstract.contains="[Ee]bola")
# print(mycreate_sqlxtab_df(glb_allobs_df[sel_obs(Headline.contains="[Ee]bola"), ],
# c(glb_rsp_var, "NewsDesk", "SectionName", "SubsectionName")))
# print(table(glb_allobs_df$NewsDesk, glb_allobs_df$SectionName))
# print(table(glb_allobs_df$SectionName, glb_allobs_df$SubsectionName))
# print(table(glb_allobs_df$NewsDesk, glb_allobs_df$SectionName, glb_allobs_df$SubsectionName))
# glb_allobs_df$myCategory.fctr <- as.factor(glb_allobs_df$myCategory)
# glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features,
# c("myCategory", "NewsDesk", "SectionName", "SubsectionName"))
print(table(glb_allobs_df$cellular, glb_allobs_df$carrier, useNA="ifany"))
##
## AT&T None Other Sprint T-Mobile Unknown Verizon
## 0 0 1593 0 0 0 0 0
## 1 288 0 4 36 28 172 196
## Unknown 4 4 2 0 0 330 0
# glb_allobs_df[(glb_allobs_df$cellular %in% c("Unknown")) &
# (glb_allobs_df$carrier %in% c("AT&T", "Other")),
# c(glb_id_var, glb_rsp_var_raw, "description", "carrier", "cellular")]
glb_allobs_df[(glb_allobs_df$cellular %in% c("Unknown")) &
(glb_allobs_df$carrier %in% c("AT&T", "Other")),
"cellular"] <- "1"
# glb_allobs_df[(glb_allobs_df$cellular %in% c("Unknown")) &
# (glb_allobs_df$carrier %in% c("None")),
# c(glb_id_var, glb_rsp_var_raw, "description", "carrier", "cellular")]
glb_allobs_df[(glb_allobs_df$cellular %in% c("Unknown")) &
(glb_allobs_df$carrier %in% c("None")),
"cellular"] <- "0"
print(table(glb_allobs_df$cellular, glb_allobs_df$carrier, useNA="ifany"))
##
## AT&T None Other Sprint T-Mobile Unknown Verizon
## 0 0 1597 0 0 0 0 0
## 1 292 0 6 36 28 172 196
## Unknown 0 0 0 0 0 330 0
2.1: scrub dataglb_chunks_df <- myadd_chunk(glb_chunks_df, "transform.data", major.inc=FALSE)
## label step_major step_minor bgn end elapsed
## 3 scrub.data 2 1 16.480 17.229 0.749
## 4 transform.data 2 2 17.229 NA NA
### Mapping dictionary
#sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
if (!is.null(glb_map_vars)) {
for (feat in glb_map_vars) {
map_df <- myimport_data(url=glb_map_urls[[feat]],
comment="map_df",
print_diagn=TRUE)
glb_allobs_df <- mymap_codes(glb_allobs_df, feat, names(map_df)[2],
map_df, map_join_col_name=names(map_df)[1],
map_tgt_col_name=names(map_df)[2])
}
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, glb_map_vars)
}
### Forced Assignments
#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
for (feat in glb_assign_vars) {
new_feat <- paste0(feat, ".my")
print(sprintf("Forced Assignments for: %s -> %s...", feat, new_feat))
glb_allobs_df[, new_feat] <- glb_allobs_df[, feat]
pairs <- glb_assign_pairs_lst[[feat]]
for (pair_ix in 1:length(pairs$from)) {
if (is.na(pairs$from[pair_ix]))
nobs <- nrow(filter(glb_allobs_df,
is.na(eval(parse(text=feat),
envir=glb_allobs_df)))) else
nobs <- sum(glb_allobs_df[, feat] == pairs$from[pair_ix])
#nobs <- nrow(filter(glb_allobs_df, is.na(Married.fctr))) ; print(nobs)
if ((is.na(pairs$from[pair_ix])) && (is.na(pairs$to[pair_ix])))
stop("what are you trying to do ???")
if (is.na(pairs$from[pair_ix]))
glb_allobs_df[is.na(glb_allobs_df[, feat]), new_feat] <-
pairs$to[pair_ix] else
glb_allobs_df[glb_allobs_df[, feat] == pairs$from[pair_ix], new_feat] <-
pairs$to[pair_ix]
print(sprintf(" %s -> %s for %s obs",
pairs$from[pair_ix], pairs$to[pair_ix], format(nobs, big.mark=",")))
}
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, glb_assign_vars)
}
### Derivations using mapping functions
#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
for (new_feat in glb_derive_vars) {
print(sprintf("Creating new feature: %s...", new_feat))
args_lst <- NULL
for (arg in glb_derive_lst[[new_feat]]$args)
args_lst[[arg]] <- glb_allobs_df[, arg]
glb_allobs_df[, new_feat] <- do.call(glb_derive_lst[[new_feat]]$mapfn, args_lst)
}
## [1] "Creating new feature: idseq.my..."
## [1] "Creating new feature: prdline.my..."
## [1] "Creating new feature: startprice.log..."
## [1] "Creating new feature: descr.my..."
#stop(here")
#hex_vctr <- c("\n", "\211", "\235", "\317", "\333")
hex_regex <- paste0(c("\n", "\211", "\235", "\317", "\333"), collapse="|")
for (obs_id in c(10029, 10948, 10136, 10178, 11514, 11904, 12157, 12210, 12659)) {
# tmp_str <- unlist(strsplit(glb_allobs_df[row_pos, "descr.my"], ""))
# glb_allobs_df[row_pos, "descr.my"] <- paste0(tmp_str[!tmp_str %in% hex_vctr],
# collapse="")
row_pos <- which(glb_allobs_df$UniqueID == obs_id)
glb_allobs_df[row_pos, "descr.my"] <-
gsub(hex_regex, " ", glb_allobs_df[row_pos, "descr.my"])
}
2.2: transform data#```{r extract_features, cache=FALSE, eval=!is.null(glb_txt_vars)}
glb_chunks_df <- myadd_chunk(glb_chunks_df, "extract.features", major.inc=TRUE)
## label step_major step_minor bgn end elapsed
## 4 transform.data 2 2 17.229 17.867 0.638
## 5 extract.features 3 0 17.868 NA NA
extract.features_chunk_df <- myadd_chunk(NULL, "extract.features_bgn")
## label step_major step_minor bgn end elapsed
## 1 extract.features_bgn 1 0 17.874 NA NA
# Options:
# Select Tf, log(1 + Tf), Tf-IDF or BM25Tf-IDf
# Create new features that help prediction
# <col_name>.lag.2 <- lag(zoo(glb_trnobs_df$<col_name>), -2, na.pad=TRUE)
# glb_trnobs_df[, "<col_name>.lag.2"] <- coredata(<col_name>.lag.2)
# <col_name>.lag.2 <- lag(zoo(glb_newobs_df$<col_name>), -2, na.pad=TRUE)
# glb_newobs_df[, "<col_name>.lag.2"] <- coredata(<col_name>.lag.2)
#
# glb_newobs_df[1, "<col_name>.lag.2"] <- glb_trnobs_df[nrow(glb_trnobs_df) - 1,
# "<col_name>"]
# glb_newobs_df[2, "<col_name>.lag.2"] <- glb_trnobs_df[nrow(glb_trnobs_df),
# "<col_name>"]
# glb_allobs_df <- mutate(glb_allobs_df,
# A.P.http=ifelse(grepl("http",Added,fixed=TRUE), 1, 0)
# )
#
# glb_trnobs_df <- mutate(glb_trnobs_df,
# )
#
# glb_newobs_df <- mutate(glb_newobs_df,
# )
# Convert dates to numbers
# typically, dates come in as chars;
# so this must be done before converting chars to factors
#stop(here"); sav_allobs_df <- glb_allobs_df #; glb_allobs_df <- sav_allobs_df
if (!is.null(glb_date_vars)) {
glb_allobs_df <- cbind(glb_allobs_df,
myextract_dates_df(df=glb_allobs_df, vars=glb_date_vars,
id_vars=glb_id_var, rsp_var=glb_rsp_var))
for (sfx in c("", ".POSIX"))
glb_exclude_vars_as_features <-
union(glb_exclude_vars_as_features,
paste(glb_date_vars, sfx, sep=""))
for (feat in glb_date_vars) {
glb_allobs_df <- orderBy(reformulate(paste0(feat, ".POSIX")), glb_allobs_df)
# print(myplot_scatter(glb_allobs_df, xcol_name=paste0(feat, ".POSIX"),
# ycol_name=glb_rsp_var, colorcol_name=glb_rsp_var))
print(myplot_scatter(glb_allobs_df[glb_allobs_df[, paste0(feat, ".POSIX")] >=
strptime("2012-12-01", "%Y-%m-%d"), ],
xcol_name=paste0(feat, ".POSIX"),
ycol_name=glb_rsp_var, colorcol_name=paste0(feat, ".wkend")))
# Create features that measure the gap between previous timestamp in the data
require(zoo)
z <- zoo(as.numeric(as.POSIXlt(glb_allobs_df[, paste0(feat, ".POSIX")])))
glb_allobs_df[, paste0(feat, ".zoo")] <- z
print(head(glb_allobs_df[, c(glb_id_var, feat, paste0(feat, ".zoo"))]))
print(myplot_scatter(glb_allobs_df[glb_allobs_df[, paste0(feat, ".POSIX")] >
strptime("2012-10-01", "%Y-%m-%d"), ],
xcol_name=paste0(feat, ".zoo"), ycol_name=glb_rsp_var,
colorcol_name=glb_rsp_var))
b <- zoo(, seq(nrow(glb_allobs_df)))
last1 <- as.numeric(merge(z-lag(z, -1), b, all=TRUE)); last1[is.na(last1)] <- 0
glb_allobs_df[, paste0(feat, ".last1.log")] <- log(1 + last1)
print(gp <- myplot_box(df=glb_allobs_df[glb_allobs_df[,
paste0(feat, ".last1.log")] > 0, ],
ycol_names=paste0(feat, ".last1.log"),
xcol_name=glb_rsp_var))
last2 <- as.numeric(merge(z-lag(z, -2), b, all=TRUE)); last2[is.na(last2)] <- 0
glb_allobs_df[, paste0(feat, ".last2.log")] <- log(1 + last2)
print(gp <- myplot_box(df=glb_allobs_df[glb_allobs_df[,
paste0(feat, ".last2.log")] > 0, ],
ycol_names=paste0(feat, ".last2.log"),
xcol_name=glb_rsp_var))
last10 <- as.numeric(merge(z-lag(z, -10), b, all=TRUE)); last10[is.na(last10)] <- 0
glb_allobs_df[, paste0(feat, ".last10.log")] <- log(1 + last10)
print(gp <- myplot_box(df=glb_allobs_df[glb_allobs_df[,
paste0(feat, ".last10.log")] > 0, ],
ycol_names=paste0(feat, ".last10.log"),
xcol_name=glb_rsp_var))
last100 <- as.numeric(merge(z-lag(z, -100), b, all=TRUE)); last100[is.na(last100)] <- 0
glb_allobs_df[, paste0(feat, ".last100.log")] <- log(1 + last100)
print(gp <- myplot_box(df=glb_allobs_df[glb_allobs_df[,
paste0(feat, ".last100.log")] > 0, ],
ycol_names=paste0(feat, ".last100.log"),
xcol_name=glb_rsp_var))
glb_allobs_df <- orderBy(reformulate(glb_id_var), glb_allobs_df)
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features,
c(paste0(feat, ".zoo")))
# all2$last3 = as.numeric(merge(z-lag(z, -3), b, all = TRUE))
# all2$last5 = as.numeric(merge(z-lag(z, -5), b, all = TRUE))
# all2$last10 = as.numeric(merge(z-lag(z, -10), b, all = TRUE))
# all2$last20 = as.numeric(merge(z-lag(z, -20), b, all = TRUE))
# all2$last50 = as.numeric(merge(z-lag(z, -50), b, all = TRUE))
#
#
# # order table
# all2 = all2[order(all2$id),]
#
# ## fill in NAs
# # count averages
# na.avg = all2 %>% group_by(weekend, hour) %>% dplyr::summarise(
# last1=mean(last1, na.rm=TRUE),
# last3=mean(last3, na.rm=TRUE),
# last5=mean(last5, na.rm=TRUE),
# last10=mean(last10, na.rm=TRUE),
# last20=mean(last20, na.rm=TRUE),
# last50=mean(last50, na.rm=TRUE)
# )
#
# # fill in averages
# na.merge = merge(all2, na.avg, by=c("weekend","hour"))
# na.merge = na.merge[order(na.merge$id),]
# for(i in c("last1", "last3", "last5", "last10", "last20", "last50")) {
# y = paste0(i, ".y")
# idx = is.na(all2[[i]])
# all2[idx,][[i]] <- na.merge[idx,][[y]]
# }
# rm(na.avg, na.merge, b, i, idx, n, pd, sec, sh, y, z)
}
}
rm(last1, last10, last100)
## Warning in rm(last1, last10, last100): object 'last1' not found
## Warning in rm(last1, last10, last100): object 'last10' not found
## Warning in rm(last1, last10, last100): object 'last100' not found
# Create factors of string variables
extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df,
paste0("extract.features_", "factorize.str.vars"), major.inc=TRUE)
## label step_major step_minor bgn end
## 1 extract.features_bgn 1 0 17.874 17.888
## 2 extract.features_factorize.str.vars 2 0 17.889 NA
## elapsed
## 1 0.014
## 2 NA
#stop(here"); sav_allobs_df <- glb_allobs_df; #glb_allobs_df <- sav_allobs_df
print(str_vars <- myfind_chr_cols_df(glb_allobs_df))
## description condition cellular carrier color
## "description" "condition" "cellular" "carrier" "color"
## storage productline .src .grpid prdline.my
## "storage" "productline" ".src" ".grpid" "prdline.my"
## descr.my
## "descr.my"
if (length(str_vars <- setdiff(str_vars,
c(glb_exclude_vars_as_features, glb_txt_vars))) > 0) {
for (var in str_vars) {
warning("Creating factors of string variable: ", var,
": # of unique values: ", length(unique(glb_allobs_df[, var])))
glb_allobs_df[, paste0(var, ".fctr")] <-
relevel(factor(glb_allobs_df[, var]),
names(which.max(table(glb_allobs_df[, var], useNA = "ifany"))))
}
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features, str_vars)
}
## Warning: Creating factors of string variable: condition: # of unique
## values: 6
## Warning: Creating factors of string variable: cellular: # of unique values:
## 3
## Warning: Creating factors of string variable: carrier: # of unique values:
## 7
## Warning: Creating factors of string variable: color: # of unique values: 5
## Warning: Creating factors of string variable: storage: # of unique values:
## 5
if (!is.null(glb_txt_vars)) {
require(foreach)
require(gsubfn)
require(stringr)
require(tm)
extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df,
paste0("extract.features_", "process.text"), major.inc=TRUE)
chk_pattern_freq <- function(rex_str, ignore.case=TRUE) {
match_mtrx <- str_extract_all(txt_vctr, regex(rex_str, ignore_case=ignore.case),
simplify=TRUE)
match_df <- as.data.frame(match_mtrx[match_mtrx != ""])
names(match_df) <- "pattern"
return(mycreate_sqlxtab_df(match_df, "pattern"))
}
# match_lst <- gregexpr("\\bok(?!ay)", txt_vctr[746], ignore.case = FALSE, perl=TRUE); print(match_lst)
dsp_pattern <- function(rex_str, ignore.case=TRUE, print.all=TRUE) {
match_lst <- gregexpr(rex_str, txt_vctr, ignore.case = ignore.case, perl=TRUE)
match_lst <- regmatches(txt_vctr, match_lst)
match_df <- data.frame(matches=sapply(match_lst,
function (elems) paste(elems, collapse="#")))
match_df <- subset(match_df, matches != "")
if (print.all)
print(match_df)
return(match_df)
}
dsp_matches <- function(rex_str, ix) {
print(match_pos <- gregexpr(rex_str, txt_vctr[ix], perl=TRUE))
print(str_sub(txt_vctr[ix], (match_pos[[1]] / 100) * 99 + 0,
(match_pos[[1]] / 100) * 100 + 100))
}
myapply_gsub <- function(...) {
if ((length_lst <- length(names(gsub_map_lst))) == 0)
return(txt_vctr)
for (ptn_ix in 1:length_lst) {
if ((ptn_ix %% 10) == 0)
print(sprintf("running gsub for %02d (of %02d): #%s#...", ptn_ix,
length(names(gsub_map_lst)), names(gsub_map_lst)[ptn_ix]))
txt_vctr <- gsub(names(gsub_map_lst)[ptn_ix], gsub_map_lst[[ptn_ix]],
txt_vctr, ...)
}
return(txt_vctr)
}
myapply_txtmap <- function(txt_vctr, ...) {
nrows <- nrow(glb_txt_map_df)
for (ptn_ix in 1:nrows) {
if ((ptn_ix %% 10) == 0)
print(sprintf("running gsub for %02d (of %02d): #%s#...", ptn_ix,
nrows, glb_txt_map_df[ptn_ix, "rex_str"]))
txt_vctr <- gsub(glb_txt_map_df[ptn_ix, "rex_str"],
glb_txt_map_df[ptn_ix, "rpl_str"],
txt_vctr, ...)
}
return(txt_vctr)
}
chk.equal <- function(bgn, end) {
print(all.equal(sav_txt_lst[["Headline"]][bgn:end],
glb_txt_lst[["Headline"]][bgn:end]))
}
dsp.equal <- function(bgn, end) {
print(sav_txt_lst[["Headline"]][bgn:end])
print(glb_txt_lst[["Headline"]][bgn:end])
}
#sav_txt_lst <- glb_txt_lst; all.equal(sav_txt_lst, glb_txt_lst)
#all.equal(sav_txt_lst[["Headline"]][1:4200], glb_txt_lst[["Headline"]][1:4200])
#chk.equal( 1, 100)
#dsp.equal(86, 90)
txt_map_filename <- paste0(glb_txt_munge_filenames_pfx, "map.csv")
if (!file.exists(txt_map_filename))
stop(txt_map_filename, " not found!")
glb_txt_map_df <- read.csv(txt_map_filename, comment.char="#", strip.white=TRUE)
glb_txt_lst <- list();
print(sprintf("Building glb_txt_lst..."))
glb_txt_lst <- foreach(txt_var=glb_txt_vars) %dopar% {
# for (txt_var in glb_txt_vars) {
txt_vctr <- glb_allobs_df[, txt_var]
# myapply_txtmap shd be created as a tm_map::content_transformer ?
#print(glb_txt_map_df)
#txt_var=glb_txt_vars[3]; txt_vctr <- glb_txt_lst[[txt_var]]
#print(rex_str <- glb_txt_map_df[3, "rex_str"])
#print(rex_str <- glb_txt_map_df[glb_txt_map_df$rex_str == "\\bWall St\\.", "rex_str"])
#print(rex_str <- glb_txt_map_df[grepl("du Pont", glb_txt_map_df$rex_str), "rex_str"])
#print(rex_str <- glb_txt_map_df[glb_txt_map_df$rpl_str == "versus", "rex_str"])
#print(tmp_vctr <- grep(rex_str, txt_vctr, value=TRUE, ignore.case=FALSE))
#ret_lst <- regexec(rex_str, txt_vctr, ignore.case=FALSE); ret_lst <- regmatches(txt_vctr, ret_lst); ret_vctr <- sapply(1:length(ret_lst), function(pos_ix) ifelse(length(ret_lst[[pos_ix]]) > 0, ret_lst[[pos_ix]], "")); print(ret_vctr <- ret_vctr[ret_vctr != ""])
#gsub(rex_str, glb_txt_map_df[glb_txt_map_df$rex_str == rex_str, "rpl_str"], tmp_vctr, ignore.case=FALSE)
#grep("Hong Hong", txt_vctr, value=TRUE)
txt_vctr <- myapply_txtmap(txt_vctr, ignore.case=FALSE)
}
names(glb_txt_lst) <- glb_txt_vars
for (txt_var in glb_txt_vars) {
print(sprintf("Remaining OK in %s:", txt_var))
txt_vctr <- glb_txt_lst[[txt_var]]
print(chk_pattern_freq(rex_str <- "(?<!(BO|HO|LO))OK(?!(E\\!|ED|IE|IN|S ))",
ignore.case=FALSE))
match_df <- dsp_pattern(rex_str, ignore.case=FALSE, print.all=FALSE)
for (row in row.names(match_df))
dsp_matches(rex_str, ix=as.numeric(row))
print(chk_pattern_freq(rex_str <- "Ok(?!(a\\.|ay|in|ra|um))", ignore.case=FALSE))
match_df <- dsp_pattern(rex_str, ignore.case=FALSE, print.all=FALSE)
for (row in row.names(match_df))
dsp_matches(rex_str, ix=as.numeric(row))
print(chk_pattern_freq(rex_str <- "(?<!( b| B| c| C| g| G| j| M| p| P| w| W| r| Z|\\(b|ar|bo|Bo|co|Co|Ew|gk|go|ho|ig|jo|kb|ke|Ke|ki|lo|Lo|mo|mt|no|No|po|ra|ro|sm|Sm|Sp|to|To))ok(?!(ay|bo|e |e\\)|e,|e\\.|eb|ed|el|en|er|es|ey|i |ie|in|it|ka|ke|ki|ly|on|oy|ra|st|u |uc|uy|yl|yo))",
ignore.case=FALSE))
match_df <- dsp_pattern(rex_str, ignore.case=FALSE, print.all=FALSE)
for (row in row.names(match_df))
dsp_matches(rex_str, ix=as.numeric(row))
}
# txt_vctr <- glb_txt_lst[[glb_txt_vars[1]]]
# print(chk_pattern_freq(rex_str <- "(?<!( b| c| C| p|\\(b|bo|co|lo|Lo|Sp|to|To))ok(?!(ay|e |e\\)|e,|e\\.|ed|el|en|es|ey|ie|in|on|ra))", ignore.case=FALSE))
# print(chk_pattern_freq(rex_str <- "ok(?!(ay|el|on|ra))", ignore.case=FALSE))
# dsp_pattern(rex_str, ignore.case=FALSE, print.all=FALSE)
# dsp_matches(rex_str, ix=8)
# substr(txt_vctr[86], 5613, 5620)
# substr(glb_allobs_df[301, "review"], 550, 650)
#stop(here"); sav_txt_lst <- glb_txt_lst
for (txt_var in glb_txt_vars) {
print(sprintf("Remaining Acronyms in %s:", txt_var))
txt_vctr <- glb_txt_lst[[txt_var]]
print(chk_pattern_freq(rex_str <- "([[:upper:]]\\.( *)){2,}", ignore.case=FALSE))
# Check for names
print(subset(chk_pattern_freq(rex_str <- "(([[:upper:]]+)\\.( *)){1}",
ignore.case=FALSE),
.n > 1))
# dsp_pattern(rex_str="(OK\\.( *)){1}", ignore.case=FALSE)
# dsp_matches(rex_str="(OK\\.( *)){1}", ix=557)
#dsp_matches(rex_str="\\bR\\.I\\.P(\\.*)(\\B)", ix=461)
#dsp_matches(rex_str="\\bR\\.I\\.P(\\.*)", ix=461)
#print(str_sub(txt_vctr[676], 10100, 10200))
#print(str_sub(txt_vctr[74], 1, -1))
}
for (txt_var in glb_txt_vars) {
re_str <- "\\b(Fort|Ft\\.|Hong|Las|Los|New|Puerto|Saint|San|St\\.)( |-)(\\w)+"
print(sprintf("Remaining #%s# terms in %s: ", re_str, txt_var))
txt_vctr <- glb_txt_lst[[txt_var]]
print(orderBy(~ -.n +pattern, subset(chk_pattern_freq(re_str, ignore.case=FALSE),
grepl("( |-)[[:upper:]]", pattern))))
print(" consider cleaning if relevant to problem domain; geography name; .n > 1")
#grep("New G", txt_vctr, value=TRUE, ignore.case=FALSE)
#grep("St\\. Wins", txt_vctr, value=TRUE, ignore.case=FALSE)
}
#stop(here"); sav_txt_lst <- glb_txt_lst
for (txt_var in glb_txt_vars) {
re_str <- "\\b(N|S|E|W|C)( |\\.)(\\w)+"
print(sprintf("Remaining #%s# terms in %s: ", re_str, txt_var))
txt_vctr <- glb_txt_lst[[txt_var]]
print(orderBy(~ -.n +pattern, subset(chk_pattern_freq(re_str, ignore.case=FALSE),
grepl(".", pattern))))
#grep("N Weaver", txt_vctr, value=TRUE, ignore.case=FALSE)
}
for (txt_var in glb_txt_vars) {
re_str <- "\\b(North|South|East|West|Central)( |\\.)(\\w)+"
print(sprintf("Remaining #%s# terms in %s: ", re_str, txt_var))
txt_vctr <- glb_txt_lst[[txt_var]]
if (nrow(filtered_df <- subset(chk_pattern_freq(re_str, ignore.case=FALSE),
grepl(".", pattern))) > 0)
print(orderBy(~ -.n +pattern, filtered_df))
#grep("Central (African|Bankers|Cast|Italy|Role|Spring)", txt_vctr, value=TRUE, ignore.case=FALSE)
#grep("East (Africa|Berlin|London|Poland|Rivals|Spring)", txt_vctr, value=TRUE, ignore.case=FALSE)
#grep("North (American|Korean|West)", txt_vctr, value=TRUE, ignore.case=FALSE)
#grep("South (Pacific|Street)", txt_vctr, value=TRUE, ignore.case=FALSE)
#grep("St\\. Martins", txt_vctr, value=TRUE, ignore.case=FALSE)
}
find_cmpnd_wrds <- function(txt_vctr) {
txt_corpus <- Corpus(VectorSource(txt_vctr))
txt_corpus <- tm_map(txt_corpus, content_transformer(tolower), lazy=TRUE)
txt_corpus <- tm_map(txt_corpus, PlainTextDocument, lazy=TRUE)
txt_corpus <- tm_map(txt_corpus, removePunctuation, lazy=TRUE,
preserve_intra_word_dashes=TRUE, lazy=TRUE)
full_Tf_DTM <- DocumentTermMatrix(txt_corpus,
control=list(weighting=weightTf))
print(" Full TermMatrix:"); print(full_Tf_DTM)
full_Tf_mtrx <- as.matrix(full_Tf_DTM)
rownames(full_Tf_mtrx) <- rownames(glb_allobs_df) # print undreadable otherwise
full_Tf_vctr <- colSums(full_Tf_mtrx)
names(full_Tf_vctr) <- dimnames(full_Tf_DTM)[[2]]
#grep("year", names(full_Tf_vctr), value=TRUE)
#which.max(full_Tf_mtrx[, "yearlong"])
full_Tf_df <- as.data.frame(full_Tf_vctr)
names(full_Tf_df) <- "Tf.full"
full_Tf_df$term <- rownames(full_Tf_df)
#full_Tf_df$freq.full <- colSums(full_Tf_mtrx != 0)
full_Tf_df <- orderBy(~ -Tf.full, full_Tf_df)
cmpnd_Tf_df <- full_Tf_df[grep("-", full_Tf_df$term, value=TRUE) ,]
txt_compound_filename <- paste0(glb_txt_munge_filenames_pfx, "compound.csv")
if (!file.exists(txt_compound_filename))
stop(txt_compound_filename, " not found!")
filter_df <- read.csv(txt_compound_filename, comment.char="#", strip.white=TRUE)
cmpnd_Tf_df$filter <- FALSE
for (row_ix in 1:nrow(filter_df))
cmpnd_Tf_df[!cmpnd_Tf_df$filter, "filter"] <-
grepl(filter_df[row_ix, "rex_str"],
cmpnd_Tf_df[!cmpnd_Tf_df$filter, "term"], ignore.case=TRUE)
cmpnd_Tf_df <- subset(cmpnd_Tf_df, !filter)
# Bug in tm_map(txt_corpus, removePunctuation, preserve_intra_word_dashes=TRUE) ???
# "net-a-porter" gets converted to "net-aporter"
#grep("net-a-porter", txt_vctr, ignore.case=TRUE, value=TRUE)
#grep("maser-laser", txt_vctr, ignore.case=TRUE, value=TRUE)
#txt_corpus[[which(grepl("net-a-porter", txt_vctr, ignore.case=TRUE))]]
#grep("\\b(across|longer)-(\\w)", cmpnd_Tf_df$term, ignore.case=TRUE, value=TRUE)
#grep("(\\w)-(affected|term)\\b", cmpnd_Tf_df$term, ignore.case=TRUE, value=TRUE)
print(sprintf("nrow(cmpnd_Tf_df): %d", nrow(cmpnd_Tf_df)))
myprint_df(cmpnd_Tf_df)
}
extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df,
paste0("extract.features_", "process.text_reporting_compound_terms"), major.inc=FALSE)
for (txt_var in glb_txt_vars) {
print(sprintf("Remaining compound terms in %s: ", txt_var))
txt_vctr <- glb_txt_lst[[txt_var]]
# find_cmpnd_wrds(txt_vctr)
#grep("thirty-five", txt_vctr, ignore.case=TRUE, value=TRUE)
#rex_str <- glb_txt_map_df[grepl("hirty", glb_txt_map_df$rex_str), "rex_str"]
}
extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df,
paste0("extract.features_", "build.corpus"), major.inc=TRUE)
get_DTM_terms <- function(DTM) {
TfIdf_mtrx <- as.matrix(DTM)
rownames(TfIdf_mtrx) <- rownames(glb_allobs_df) # print undreadable otherwise
TfIdf_vctr <- colSums(TfIdf_mtrx)
names(TfIdf_vctr) <- dimnames(DTM)[[2]]
TfIdf_df <- as.data.frame(TfIdf_vctr)
names(TfIdf_df) <- "TfIdf"
TfIdf_df$term <- rownames(TfIdf_df)
TfIdf_df$freq <- colSums(TfIdf_mtrx != 0)
TfIdf_df$pos <- 1:nrow(TfIdf_df)
TfIdf_df$cor.y <- cor(TfIdf_mtrx, as.numeric(glb_allobs_df[, glb_txt_cor_var]),
use="pairwise.complete.obs")
TfIdf_df$cor.y.abs <- abs(TfIdf_df$cor.y)
for (cls in unique(glb_allobs_df[, glb_txt_cor_var])) {
if (!is.na(cls))
TfIdf_df[, paste0("TfIdf.", as.character(cls))] <-
colSums(TfIdf_mtrx *
as.numeric(!is.na(glb_allobs_df[, glb_txt_cor_var]) &
(glb_allobs_df[, glb_txt_cor_var] == cls))) else
TfIdf_df[, paste0("TfIdf.", as.character(cls))] <-
colSums(TfIdf_mtrx *
as.numeric(is.na(glb_allobs_df[, glb_txt_cor_var])))
}
# Check all calls to get_DTM_terms to change returned order assumption
return(TfIdf_df <- orderBy(~ -TfIdf, TfIdf_df))
}
#plt_full_df <- get_DTM_terms(DTM=glb_full_DTM_lst[[txt_var]])
get_corpus_terms <- function(txt_corpus) {
TfIdf_DTM <- DocumentTermMatrix(txt_corpus,
control=list(weighting=weightTfIdf))
return(TfIdf_df <- get_DTM_terms(TfIdf_DTM))
}
#stop(here")
glb_corpus_lst <- list()
print(sprintf("Building glb_corpus_lst..."))
glb_corpus_lst <- foreach(txt_var=glb_txt_vars) %dopar% {
# for (txt_var in glb_txt_vars) {
txt_corpus <- Corpus(VectorSource(glb_txt_lst[[txt_var]]))
#tolower Not needed as of version 0.6.2 ?
txt_corpus <- tm_map(txt_corpus, PlainTextDocument, lazy=FALSE)
txt_corpus <- tm_map(txt_corpus, content_transformer(tolower), lazy=FALSE) #nuppr
# removePunctuation does not replace with whitespace. Use a custom transformer ???
txt_corpus <- tm_map(txt_corpus, removePunctuation, lazy=TRUE) #npnct<chr_ix>
# txt-corpus <- tm_map(txt_corpus, content_transformer(function(x, pattern) gsub(pattern, "", x))
txt_corpus <- tm_map(txt_corpus, removeWords,
c(glb_append_stop_words[[txt_var]],
stopwords("english")), lazy=TRUE) #nstopwrds
#print("StoppedWords:"); stopped_words_TfIdf_df <- inspect_terms(txt_corpus)
#stopped_words_TfIdf_df[grepl("cond", stopped_words_TfIdf_df$term, ignore.case=TRUE), ]
#txt_X_mtrx <- as.matrix(DocumentTermMatrix(txt_corpus, control=list(weighting=weightTfIdf)))
#which(txt_X_mtrx[, 211] > 0)
#glb_allobs_df[which(txt_X_mtrx[, 211] > 0), glb_txt_vars]
#txt_X_mtrx[2159, txt_X_mtrx[2159, ] > 0]
# txt_corpus <- tm_map(txt_corpus, stemDocument, "english", lazy=TRUE) #Done below
#txt_corpus <- tm_map(txt_corpus, content_transformer(stemDocument))
#print("StemmedWords:"); stemmed_words_TfIdf_df <- inspect_terms(txt_corpus)
#stemmed_words_TfIdf_df[grepl("cond", stemmed_words_TfIdf_df$term, ignore.case=TRUE), ]
#stm_X_mtrx <- as.matrix(DocumentTermMatrix(txt_corpus, control=list(weighting=weightTfIdf)))
#glb_allobs_df[which((stm_X_mtrx[, 180] > 0) | (stm_X_mtrx[, 181] > 0)), glb_txt_vars]
#glb_allobs_df[which((stm_X_mtrx[, 181] > 0)), glb_txt_vars]
# glb_corpus_lst[[txt_var]] <- txt_corpus
}
names(glb_corpus_lst) <- glb_txt_vars
#stop(here")
glb_post_stop_words_terms_df_lst <- list();
glb_post_stop_words_TfIdf_mtrx_lst <- list();
glb_post_stem_words_terms_df_lst <- list();
glb_post_stem_words_TfIdf_mtrx_lst <- list();
for (txt_var in glb_txt_vars) {
print(sprintf(" Top_n stop TfIDf terms for %s:", txt_var))
# This impacts stemming probably due to lazy parameter
print(myprint_df(full_TfIdf_df <- get_corpus_terms(glb_corpus_lst[[txt_var]]),
glb_txt_top_n[[txt_var]]))
glb_post_stop_words_terms_df_lst[[txt_var]] <- full_TfIdf_df
TfIdf_stop_mtrx <- as.matrix(DocumentTermMatrix(glb_corpus_lst[[txt_var]],
control=list(weighting=weightTfIdf)))
rownames(TfIdf_stop_mtrx) <- rownames(glb_allobs_df) # print undreadable otherwise
glb_post_stop_words_TfIdf_mtrx_lst[[txt_var]] <- TfIdf_stop_mtrx
tmp_allobs_df <- glb_allobs_df[, c(glb_id_var, glb_rsp_var)]
tmp_allobs_df$terms.n.post.stop <- rowSums(TfIdf_stop_mtrx > 0)
tmp_allobs_df$terms.n.post.stop.log <- log(1 + tmp_allobs_df$terms.n.post.stop)
tmp_allobs_df$TfIdf.sum.post.stop <- rowSums(TfIdf_stop_mtrx)
print(sprintf(" Top_n stem TfIDf terms for %s:", txt_var))
glb_corpus_lst[[txt_var]] <- tm_map(glb_corpus_lst[[txt_var]], stemDocument,
"english", lazy=TRUE) #Features ???
print(myprint_df(full_TfIdf_df <- get_corpus_terms(glb_corpus_lst[[txt_var]]),
glb_txt_top_n[[txt_var]]))
glb_post_stem_words_terms_df_lst[[txt_var]] <- full_TfIdf_df
TfIdf_stem_mtrx <- as.matrix(DocumentTermMatrix(glb_corpus_lst[[txt_var]],
control=list(weighting=weightTfIdf)))
rownames(TfIdf_stem_mtrx) <- rownames(glb_allobs_df) # print undreadable otherwise
glb_post_stem_words_TfIdf_mtrx_lst[[txt_var]] <- TfIdf_stem_mtrx
tmp_allobs_df$terms.n.post.stem <- rowSums(TfIdf_stem_mtrx > 0)
tmp_allobs_df$terms.n.post.stem.log <- log(1 + tmp_allobs_df$terms.n.post.stem)
tmp_allobs_df$TfIdf.sum.post.stem <- rowSums(TfIdf_stem_mtrx)
tmp_allobs_df$terms.n.stem.stop.Ratio <-
1.0 * tmp_allobs_df$terms.n.post.stem / tmp_allobs_df$terms.n.post.stop
tmp_allobs_df[is.nan(tmp_allobs_df$terms.n.stem.stop.Ratio),
"terms.n.stem.stop.Ratio"] <- 1.0
tmp_allobs_df$TfIdf.sum.stem.stop.Ratio <-
1.0 * tmp_allobs_df$TfIdf.sum.post.stem / tmp_allobs_df$TfIdf.sum.post.stop
tmp_allobs_df[is.nan(tmp_allobs_df$TfIdf.sum.stem.stop.Ratio),
"TfIdf.sum.stem.stop.Ratio"] <- 1.0
tmp_trnobs_df <- tmp_allobs_df[!is.na(tmp_allobs_df[, glb_rsp_var]), ]
print(cor(as.matrix(tmp_trnobs_df[, -c(1, 2)]),
as.numeric(tmp_trnobs_df[, glb_rsp_var])))
txt_var_pfx <- toupper(substr(txt_var, 1, 1))
tmp_allobs_df <- tmp_allobs_df[, -c(1, 2)]
names(tmp_allobs_df) <- paste(paste0(txt_var_pfx, "."), names(tmp_allobs_df),
sep="")
glb_allobs_df <- cbind(glb_allobs_df, tmp_allobs_df)
glb_exclude_vars_as_features <- c(glb_exclude_vars_as_features,
paste(txt_var_pfx, c("terms.n.post.stop", "terms.n.post.stem")))
}
extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df,
paste0("extract.features_", "extract.DTM"), major.inc=TRUE)
#stop(here")
glb_full_DTM_lst <- list(); glb_sprs_DTM_lst <- list();
for (txt_var in glb_txt_vars) {
print(sprintf("Extracting TfIDf terms for %s...", txt_var))
txt_corpus <- glb_corpus_lst[[txt_var]]
# full_Tf_DTM <- DocumentTermMatrix(txt_corpus,
# control=list(weighting=weightTf))
full_TfIdf_DTM <- DocumentTermMatrix(txt_corpus,
control=list(weighting=weightTfIdf))
sprs_TfIdf_DTM <- removeSparseTerms(full_TfIdf_DTM,
glb_sprs_thresholds[txt_var])
# glb_full_DTM_lst[[txt_var]] <- full_Tf_DTM
# glb_sprs_DTM_lst[[txt_var]] <- sprs_Tf_DTM
glb_full_DTM_lst[[txt_var]] <- full_TfIdf_DTM
glb_sprs_DTM_lst[[txt_var]] <- sprs_TfIdf_DTM
}
extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df,
paste0("extract.features_", "report.DTM"), major.inc=TRUE)
require(reshape2)
for (txt_var in glb_txt_vars) {
print(sprintf("Reporting TfIDf terms for %s...", txt_var))
full_TfIdf_DTM <- glb_full_DTM_lst[[txt_var]]
sprs_TfIdf_DTM <- glb_sprs_DTM_lst[[txt_var]]
print(" Full TermMatrix:"); print(full_TfIdf_DTM)
full_TfIdf_df <- get_DTM_terms(full_TfIdf_DTM)
full_TfIdf_df <- full_TfIdf_df[, c(2, 1, 3, 4)]
col_names <- names(full_TfIdf_df)
col_names[2:length(col_names)] <-
paste(col_names[2:length(col_names)], ".full", sep="")
names(full_TfIdf_df) <- col_names
# full_TfIdf_mtrx <- as.matrix(full_TfIdf_DTM)
# rownames(full_TfIdf_mtrx) <- rownames(glb_allobs_df) # print undreadable otherwise
# full_TfIdf_vctr <- colSums(full_TfIdf_mtrx)
# names(full_TfIdf_vctr) <- dimnames(full_TfIdf_DTM)[[2]]
# full_TfIdf_df <- as.data.frame(full_TfIdf_vctr)
# names(full_TfIdf_df) <- "TfIdf.full"
# full_TfIdf_df$term <- rownames(full_TfIdf_df)
# full_TfIdf_df$freq.full <- colSums(full_TfIdf_mtrx != 0)
# full_TfIdf_df <- orderBy(~ -TfIdf.full, full_TfIdf_df)
print(" Sparse TermMatrix:"); print(sprs_TfIdf_DTM)
sprs_TfIdf_df <- get_DTM_terms(sprs_TfIdf_DTM)
sprs_TfIdf_df <- sprs_TfIdf_df[, c(2, 1, 3, 4)]
col_names <- names(sprs_TfIdf_df)
col_names[2:length(col_names)] <-
paste(col_names[2:length(col_names)], ".sprs", sep="")
names(sprs_TfIdf_df) <- col_names
# sprs_TfIdf_vctr <- colSums(as.matrix(sprs_TfIdf_DTM))
# names(sprs_TfIdf_vctr) <- dimnames(sprs_TfIdf_DTM)[[2]]
# sprs_TfIdf_df <- as.data.frame(sprs_TfIdf_vctr)
# names(sprs_TfIdf_df) <- "TfIdf.sprs"
# sprs_TfIdf_df$term <- rownames(sprs_TfIdf_df)
# sprs_TfIdf_df$freq.sprs <- colSums(as.matrix(sprs_TfIdf_DTM) != 0)
# sprs_TfIdf_df <- orderBy(~ -TfIdf.sprs, sprs_TfIdf_df)
terms_TfIdf_df <- merge(full_TfIdf_df, sprs_TfIdf_df, all.x=TRUE)
terms_TfIdf_df$in.sprs <- !is.na(terms_TfIdf_df$freq.sprs)
plt_TfIdf_df <- subset(terms_TfIdf_df,
TfIdf.full >= min(terms_TfIdf_df$TfIdf.sprs, na.rm=TRUE))
plt_TfIdf_df$label <- ""
plt_TfIdf_df[is.na(plt_TfIdf_df$TfIdf.sprs), "label"] <-
plt_TfIdf_df[is.na(plt_TfIdf_df$TfIdf.sprs), "term"]
# glb_important_terms[[txt_var]] <- union(glb_important_terms[[txt_var]],
# plt_TfIdf_df[is.na(plt_TfIdf_df$TfIdf.sprs), "term"])
print(myplot_scatter(plt_TfIdf_df, "freq.full", "TfIdf.full",
colorcol_name="in.sprs") +
geom_text(aes(label=label), color="Black", size=3.5))
melt_TfIdf_df <- orderBy(~ -value, melt(terms_TfIdf_df, id.var="term"))
print(ggplot(melt_TfIdf_df, aes(value, color=variable)) + stat_ecdf() +
geom_hline(yintercept=glb_sprs_thresholds[txt_var],
linetype = "dotted"))
melt_TfIdf_df <- orderBy(~ -value,
melt(subset(terms_TfIdf_df, !is.na(TfIdf.sprs)), id.var="term"))
print(myplot_hbar(melt_TfIdf_df, "term", "value",
colorcol_name="variable"))
melt_TfIdf_df <- orderBy(~ -value,
melt(subset(terms_TfIdf_df, is.na(TfIdf.sprs)), id.var="term"))
print(myplot_hbar(head(melt_TfIdf_df, 10), "term", "value",
colorcol_name="variable"))
}
# sav_full_DTM_lst <- glb_full_DTM_lst
# sav_sprs_DTM_lst <- glb_sprs_DTM_lst
# print(identical(sav_glb_corpus_lst, glb_corpus_lst))
# print(all.equal(length(sav_glb_corpus_lst), length(glb_corpus_lst)))
# print(all.equal(names(sav_glb_corpus_lst), names(glb_corpus_lst)))
# print(all.equal(sav_glb_corpus_lst[["Headline"]], glb_corpus_lst[["Headline"]]))
# print(identical(sav_full_DTM_lst, glb_full_DTM_lst))
# print(identical(sav_sprs_DTM_lst, glb_sprs_DTM_lst))
rm(full_TfIdf_mtrx, full_TfIdf_df, melt_TfIdf_df, terms_TfIdf_df)
# Create txt features
if ((length(glb_txt_vars) > 1) &&
(length(unique(pfxs <- sapply(glb_txt_vars,
function(txt) toupper(substr(txt, 1, 1))))) < length(glb_txt_vars)))
stop("Prefixes for corpus freq terms not unique: ", pfxs)
extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df,
paste0("extract.features_", "bind.DTM"),
major.inc=TRUE)
#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
require(tidyr)
for (txt_var in glb_txt_vars) {
print(sprintf("Binding DTM for %s...", txt_var))
txt_var_pfx <- toupper(substr(txt_var, 1, 1))
txt_full_X_df <- as.data.frame(as.matrix(glb_full_DTM_lst[[txt_var]]))
terms_full_df <- get_DTM_terms(glb_full_DTM_lst[[txt_var]])
colnames(txt_full_X_df) <- paste(txt_var_pfx, ".T.",
make.names(colnames(txt_full_X_df)), sep="")
rownames(txt_full_X_df) <- rownames(glb_allobs_df) # warning otherwise
plt_full_df <- terms_full_df
names(plt_full_df)[grepl("TfIdf$", names(plt_full_df))] <- "TfIdf.all"
# gather(plt_full_df[1:5, ], domain, TfIdf, -matches("!(TfIdf)"))
# gather(plt_full_df[1:5, grepl("TfIdf", names(plt_full_df))], domain, TfIdf)
# gather(plt_full_df[1:5, ], domain, TfIdf,
# -names(plt_full_df)[!grepl("TfIdf", names(plt_full_df))])
plt_full_df <- gather(plt_full_df, domain, TfIdf,
-c(term, freq, pos, cor.y, cor.y.abs))
plt_full_df$label <- NA
top_val_terms <- orderBy(~-TfIdf, terms_full_df)$term[1:glb_txt_top_n[[txt_var]]]
plt_full_df[plt_full_df$term %in% top_val_terms, "label"] <-
plt_full_df[plt_full_df$term %in% top_val_terms, "term"]
top_cor_terms <- orderBy(~-cor.y.abs,
terms_full_df)$term[1:glb_txt_top_n[[txt_var]]]
plt_full_df[plt_full_df$term %in% top_cor_terms, "label"] <-
plt_full_df[plt_full_df$term %in% top_cor_terms, "term"]
print(ggplot(plt_full_df, aes(x=TfIdf, y=cor.y)) + facet_wrap(~ domain) +
geom_point(aes(size=freq), color="grey") +
geom_jitter() +
geom_text(aes(label=label), color="NavyBlue", size=3.5))
if (glb_txt_filter_terms == "sparse") {
txt_X_df <- as.data.frame(as.matrix(glb_sprs_DTM_lst[[txt_var]]))
select_terms <- make.names(colnames(txt_X_df))
# colnames(txt_X_df) <- paste(txt_var_pfx, ".T.",
# make.names(colnames(txt_X_df)), sep="")
# rownames(txt_X_df) <- rownames(glb_allobs_df) # warning otherwise
} else if (glb_txt_filter_terms == "top.val") {
select_terms <- orderBy(~-TfIdf,
terms_full_df)$term[1:glb_txt_top_n[[txt_var]]]
# txt_X_df <- txt_full_X_df[, subset(terms_full_df, term %in% select_terms)$pos,
# FALSE]
} else if (glb_txt_filter_terms == "top.cor") {
select_terms <- orderBy(~-cor.y.abs,
terms_full_df)$term[1:glb_txt_top_n[[txt_var]]]
# txt_X_df <- txt_full_X_df[, subset(terms_full_df, term %in% select_terms)$pos,
# FALSE]
} else stop(
"glb_txt_filter_terms should be one of c('sparse', 'top.val', 'top.cor') vs. '",
glb_txt_filter_terms, "'")
assoc_terms_lst <- findAssocs(glb_full_DTM_lst[[txt_var]], select_terms, c(0.2))
assoc_terms <- c(NULL)
for (term in names(assoc_terms_lst))
if (length(assoc_terms_lst[[term]]) > 0)
assoc_terms <- union(assoc_terms, names(assoc_terms_lst[[term]]))
txt_X_df <- txt_full_X_df[,
subset(terms_full_df, term %in% c(select_terms, assoc_terms))$pos,
FALSE]
glb_allobs_df <- cbind(glb_allobs_df, txt_X_df) # TfIdf is normalized
#glb_allobs_df <- cbind(glb_allobs_df, log_X_df) # if using non-normalized metrics
}
#identical(chk_entity_df, glb_allobs_df)
#chk_entity_df <- glb_allobs_df
extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df,
paste0("extract.features_", "bind.DXM"),
major.inc=TRUE)
#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
glb_punct_vctr <- c("!", "\"", "#", "\\$", "%", "&", "'",
"\\(|\\)",# "\\(", "\\)",
"\\*", "\\+", ",", "-", "\\.", "/", ":", ";",
"<|>", # "<",
"=",
# ">",
"\\?", "@", "\\[", "\\\\", "\\]", "^", "_", "`",
"\\{", "\\|", "\\}", "~")
txt_X_df <- glb_allobs_df[, c(glb_id_var, ".rnorm"), FALSE]
txt_X_df <- foreach(txt_var=glb_txt_vars, .combine=cbind) %dopar% {
#for (txt_var in glb_txt_vars) {
print(sprintf("Binding DXM for %s...", txt_var))
txt_var_pfx <- toupper(substr(txt_var, 1, 1))
txt_full_DTM_mtrx <- as.matrix(glb_full_DTM_lst[[txt_var]])
rownames(txt_full_DTM_mtrx) <- rownames(glb_allobs_df) # print undreadable otherwise
#print(txt_full_DTM_mtrx[txt_full_DTM_mtrx[, "ebola"] != 0, "ebola"])
# Create <txt_var>.T.<term> for glb_important_terms
for (term in glb_important_terms[[txt_var]])
txt_X_df[, paste0(txt_var_pfx, ".T.", make.names(term))] <-
txt_full_DTM_mtrx[, term]
# Create <txt_var>.nwrds.log & .nwrds.unq.log
txt_X_df[, paste0(txt_var_pfx, ".nwrds.log")] <-
log(1 + mycount_pattern_occ("\\w+", glb_txt_lst[[txt_var]]))
txt_X_df[, paste0(txt_var_pfx, ".nwrds.unq.log")] <-
log(1 + rowSums(txt_full_DTM_mtrx != 0))
txt_X_df[, paste0(txt_var_pfx, ".sum.TfIdf")] <-
rowSums(txt_full_DTM_mtrx)
txt_X_df[, paste0(txt_var_pfx, ".ratio.sum.TfIdf.nwrds")] <-
txt_X_df[, paste0(txt_var_pfx, ".sum.TfIdf")] /
(exp(txt_X_df[, paste0(txt_var_pfx, ".nwrds.log")]) - 1)
txt_X_df[is.nan(txt_X_df[, paste0(txt_var_pfx, ".ratio.sum.TfIdf.nwrds")]),
paste0(txt_var_pfx, ".ratio.sum.TfIdf.nwrds")] <- 0
# Create <txt_var>.nchrs.log
txt_X_df[, paste0(txt_var_pfx, ".nchrs.log")] <-
log(1 + mycount_pattern_occ(".", glb_allobs_df[, txt_var]))
txt_X_df[, paste0(txt_var_pfx, ".nuppr.log")] <-
log(1 + mycount_pattern_occ("[[:upper:]]", glb_allobs_df[, txt_var]))
txt_X_df[, paste0(txt_var_pfx, ".ndgts.log")] <-
log(1 + mycount_pattern_occ("[[:digit:]]", glb_allobs_df[, txt_var]))
# Create <txt_var>.npnct?.log
# would this be faster if it's iterated over each row instead of
# each created column ???
for (punct_ix in 1:length(glb_punct_vctr)) {
# smp0 <- " "
# smp1 <- "! \" # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~"
# smp2 <- paste(smp1, smp1, sep=" ")
# print(sprintf("Testing %s pattern:", glb_punct_vctr[punct_ix]))
# results <- mycount_pattern_occ(glb_punct_vctr[punct_ix], c(smp0, smp1, smp2))
# names(results) <- NULL; print(results)
txt_X_df[,
paste0(txt_var_pfx, ".npnct", sprintf("%02d", punct_ix), ".log")] <-
log(1 + mycount_pattern_occ(glb_punct_vctr[punct_ix],
glb_allobs_df[, txt_var]))
}
# print(head(glb_allobs_df[glb_allobs_df[, "A.npnct23.log"] > 0,
# c("UniqueID", "Popular", "Abstract", "A.npnct23.log")]))
# Create <txt_var>.nstopwrds.log & <txt_var>ratio.nstopwrds.nwrds
stop_words_rex_str <- paste0("\\b(", paste0(c(glb_append_stop_words[[txt_var]],
stopwords("english")), collapse="|"),
")\\b")
txt_X_df[, paste0(txt_var_pfx, ".nstopwrds", ".log")] <-
log(1 + mycount_pattern_occ(stop_words_rex_str, glb_txt_lst[[txt_var]]))
txt_X_df[, paste0(txt_var_pfx, ".ratio.nstopwrds.nwrds")] <-
exp(txt_X_df[, paste0(txt_var_pfx, ".nstopwrds", ".log")] -
txt_X_df[, paste0(txt_var_pfx, ".nwrds", ".log")])
# Create <txt_var>.P.http
txt_X_df[, paste(txt_var_pfx, ".P.http", sep="")] <-
as.integer(0 + mycount_pattern_occ("http", glb_allobs_df[, txt_var]))
# Create <txt_var>.P.mini & air
txt_X_df[, paste(txt_var_pfx, ".P.mini", sep="")] <-
as.integer(0 + mycount_pattern_occ("mini(?!m)", glb_allobs_df[, txt_var],
perl=TRUE))
txt_X_df[, paste(txt_var_pfx, ".P.air", sep="")] <-
as.integer(0 + mycount_pattern_occ("(?<![fhp])air", glb_allobs_df[, txt_var],
perl=TRUE))
txt_X_df[, paste(txt_var_pfx, ".P.black", sep="")] <-
as.integer(0 + mycount_pattern_occ("black", glb_allobs_df[, txt_var],
perl=TRUE))
txt_X_df[, paste(txt_var_pfx, ".P.white", sep="")] <-
as.integer(0 + mycount_pattern_occ("white", glb_allobs_df[, txt_var],
perl=TRUE))
txt_X_df[, paste(txt_var_pfx, ".P.gold", sep="")] <-
as.integer(0 + mycount_pattern_occ("gold", glb_allobs_df[, txt_var],
perl=TRUE))
txt_X_df[, paste(txt_var_pfx, ".P.spacegray", sep="")] <-
as.integer(0 + mycount_pattern_occ("spacegray", glb_allobs_df[, txt_var],
perl=TRUE))
txt_X_df <- subset(txt_X_df, select=-.rnorm)
txt_X_df <- txt_X_df[, -grep(glb_id_var, names(txt_X_df), fixed=TRUE), FALSE]
#glb_allobs_df <- cbind(glb_allobs_df, txt_X_df)
}
glb_allobs_df <- cbind(glb_allobs_df, txt_X_df)
#myplot_box(glb_allobs_df, "A.sum.TfIdf", glb_rsp_var)
# if (sum(is.na(glb_allobs_df$D.P.http)) > 0)
# stop("Why is this happening ?")
# Generate summaries
# print(summary(glb_allobs_df))
# print(sapply(names(glb_allobs_df), function(col) sum(is.na(glb_allobs_df[, col]))))
# print(summary(glb_trnobs_df))
# print(sapply(names(glb_trnobs_df), function(col) sum(is.na(glb_trnobs_df[, col]))))
# print(summary(glb_newobs_df))
# print(sapply(names(glb_newobs_df), function(col) sum(is.na(glb_newobs_df[, col]))))
glb_exclude_vars_as_features <- union(glb_exclude_vars_as_features,
glb_txt_vars)
rm(log_X_df, txt_X_df)
}
## Loading required package: stringr
## Loading required package: tm
## Loading required package: NLP
##
## Attaching package: 'NLP'
##
## The following object is masked from 'package:ggplot2':
##
## annotate
## label step_major step_minor bgn end
## 2 extract.features_factorize.str.vars 2 0 17.889 18.173
## 3 extract.features_process.text 3 0 18.174 NA
## elapsed
## 2 0.284
## 3 NA
## [1] "Building glb_txt_lst..."
## [1] "running gsub for 10 (of 178): #\\bCentral African Republic\\b#..."
## [1] "running gsub for 20 (of 178): #\\bAlejandro G\\. Iñárritu#..."
## [1] "running gsub for 30 (of 178): #\\bC\\.A\\.A\\.#..."
## [1] "running gsub for 40 (of 178): #\\bCV\\.#..."
## [1] "running gsub for 50 (of 178): #\\bE\\.P\\.A\\.#..."
## [1] "running gsub for 60 (of 178): #\\bG\\.I\\. Joe#..."
## [1] "running gsub for 70 (of 178): #\\bISIS\\.#..."
## [1] "running gsub for 80 (of 178): #\\bJ\\.K\\. Simmons#..."
## [1] "running gsub for 90 (of 178): #\\bM\\. Henri Pol#..."
## [1] "running gsub for 100 (of 178): #\\bN\\.Y\\.S\\.E\\.#..."
## [1] "running gsub for 110 (of 178): #\\bR\\.B\\.S\\.#..."
## [1] "running gsub for 120 (of 178): #\\bSteven A\\. Cohen#..."
## [1] "running gsub for 130 (of 178): #\\bV\\.A\\.#..."
## [1] "running gsub for 140 (of 178): #\\bWall Street#..."
## [1] "running gsub for 150 (of 178): #\\bSaint( |-)((Laurent|Lucia)\\b)+#..."
## [1] "running gsub for 160 (of 178): #\\bSouth( |\\\\.)(America|American|Africa|African|Carolina|Dakota|Korea|Korean|Sudan)\\b#..."
## [1] "running gsub for 170 (of 178): #(\\w)-a-year#..."
## [1] "Remaining OK in descr.my:"
## pattern .n
## 1 OK 6
## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##
## [1,] 0 0
## attr(,"capture.length")
##
## [1,] 0 0
## attr(,"capture.names")
## [1] "" ""
##
## [1] "ROKEN: Device has at least one or more problems: \nFor Parts or Repair"
## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##
## [1,] 0 0
## attr(,"capture.length")
##
## [1,] 0 0
## attr(,"capture.names")
## [1] "" ""
##
## [1] "ROKEN DEVICE: Problem with Apple ID"
## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##
## [1,] 0 0
## attr(,"capture.length")
##
## [1,] 0 0
## attr(,"capture.names")
## [1] "" ""
##
## [1] "ROKEN: Device has at least one or more problems: \nFor Parts or Repair"
## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##
## [1,] 0 0
## attr(,"capture.length")
##
## [1,] 0 0
## attr(,"capture.names")
## [1] "" ""
##
## [1] "ROKEN: Device has at least one or more problems: \nFor Parts or Repair"
## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##
## [1,] 0 0
## attr(,"capture.length")
##
## [1,] 0 0
## attr(,"capture.names")
## [1] "" ""
##
## [1] "ROKEN: Device has at least one or more problems: \nFor Parts or Repair"
## [[1]]
## [1] 3
## attr(,"match.length")
## [1] 2
## attr(,"useBytes")
## [1] TRUE
## attr(,"capture.start")
##
## [1,] 0 0
## attr(,"capture.length")
##
## [1,] 0 0
## attr(,"capture.names")
## [1] "" ""
##
## [1] "ROKEN SCREEN"
## [1] pattern .n
## <0 rows> (or 0-length row.names)
## [1] pattern .n
## <0 rows> (or 0-length row.names)
## [1] "Remaining Acronyms in descr.my:"
## [1] pattern .n
## <0 rows> (or 0-length row.names)
## pattern .n
## 1 CONDITION. 8
## 2 ONLY. 6
## 3 GB. 4
## 4 BOX. 2
## 5 CORNER. 2
## 6 ESN. 2
## 7 GOOD. 2
## 8 ICLOUD. 2
## 9 IPADS. 2
## 10 LOCKED. 2
## 11 LOCKS. 2
## 12 ONLY. 2
## 13 SCRATCHES. 2
## 14 TEARS. 2
## 15 USE. 2
## [1] "Remaining #\\b(Fort|Ft\\.|Hong|Las|Los|New|Puerto|Saint|San|St\\.)( |-)(\\w)+# terms in descr.my: "
## pattern .n
## 2 New Open 3
## 4 New Condition 2
## 7 New Digitizer 1
## 8 New Opened 1
## 9 New Scratch 1
## 10 New Screen 1
## [1] " consider cleaning if relevant to problem domain; geography name; .n > 1"
## [1] "Remaining #\\b(N|S|E|W|C)( |\\.)(\\w)+# terms in descr.my: "
## pattern .n
## 1 C Stock 3
## 2 W blue 1
## [1] "Remaining #\\b(North|South|East|West|Central)( |\\.)(\\w)+# terms in descr.my: "
## label step_major
## 3 extract.features_process.text 3
## 4 extract.features_process.text_reporting_compound_terms 3
## step_minor bgn end elapsed
## 3 0 18.174 19.789 1.615
## 4 1 19.789 NA NA
## [1] "Remaining compound terms in descr.my: "
## label step_major
## 4 extract.features_process.text_reporting_compound_terms 3
## 5 extract.features_build.corpus 4
## step_minor bgn end elapsed
## 4 1 19.789 19.794 0.005
## 5 0 19.794 NA NA
## [1] "Building glb_corpus_lst..."
## [1] " Top_n stop TfIDf terms for descr.my:"
## Warning in weighting(x): empty document(s): character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) charact
## Warning in cor(TfIdf_mtrx, as.numeric(glb_allobs_df[, glb_txt_cor_var]), :
## the standard deviation is zero
## [1] "Rows: 899; Cols: 9"
## TfIdf term freq pos cor.y cor.y.abs TfIdf.N
## condition 207.6197 condition 498 165 -0.0426588315 0.0426588315 82.44452
## new 124.1683 new 156 523 -0.0372353149 0.0372353149 50.77429
## used 121.8672 used 240 855 0.0146439599 0.0146439599 40.84209
## good 120.2664 good 197 342 -0.0002812515 0.0002812515 44.51689
## scratches 112.5796 scratches 254 687 -0.0061691062 0.0061691062 44.23566
## screen 104.9197 screen 210 689 0.0230340341 0.0230340341 36.36712
## TfIdf.Y TfIdf.NA
## condition 56.11620 69.05897
## new 30.11628 43.27771
## used 39.75401 41.27105
## good 38.21866 37.53088
## scratches 36.53015 31.81375
## screen 37.40353 31.14910
## TfIdf term freq pos cor.y cor.y.abs TfIdf.N
## correctly 5.0298080 correctly 5 186 -0.03730252 0.03730252 3.0178848
## headphone 2.0174744 headphone 2 364 -0.02152502 0.02152502 0.8646319
## guarantee 1.6250832 guarantee 1 348 -0.02152502 0.02152502 1.6250832
## real 1.2639536 real 1 638 -0.02152502 0.02152502 1.2639536
## dont 1.0341439 dont 1 248 0.02500407 0.02500407 0.0000000
## grey 0.7583722 grey 1 347 0.02500407 0.02500407 0.0000000
## TfIdf.Y TfIdf.NA
## correctly 0.0000000 2.011923
## headphone 0.0000000 1.152843
## guarantee 0.0000000 0.000000
## real 0.0000000 0.000000
## dont 1.0341439 0.000000
## grey 0.7583722 0.000000
## TfIdf term freq pos cor.y cor.y.abs TfIdf.N
## red 0.8125416 red 1 648 NA NA 0.0000000
## version 0.8125416 version 1 862 -0.02152502 0.02152502 0.8125416
## adaptor 0.7583722 adaptor 1 36 0.02500407 0.02500407 0.0000000
## divider 0.7583722 divider 1 243 0.02500407 0.02500407 0.0000000
## grey 0.7583722 grey 1 347 0.02500407 0.02500407 0.0000000
## hdmi 0.7583722 hdmi 1 363 0.02500407 0.02500407 0.0000000
## TfIdf.Y TfIdf.NA
## red 0.0000000 0.8125416
## version 0.0000000 0.0000000
## adaptor 0.7583722 0.0000000
## divider 0.7583722 0.0000000
## grey 0.7583722 0.0000000
## hdmi 0.7583722 0.0000000
## TfIdf term freq pos cor.y cor.y.abs TfIdf.N
## red 0.8125416 red 1 648 NA NA 0.0000000
## version 0.8125416 version 1 862 -0.02152502 0.02152502 0.8125416
## adaptor 0.7583722 adaptor 1 36 0.02500407 0.02500407 0.0000000
## divider 0.7583722 divider 1 243 0.02500407 0.02500407 0.0000000
## grey 0.7583722 grey 1 347 0.02500407 0.02500407 0.0000000
## hdmi 0.7583722 hdmi 1 363 0.02500407 0.02500407 0.0000000
## TfIdf.Y TfIdf.NA
## red 0.0000000 0.8125416
## version 0.0000000 0.0000000
## adaptor 0.7583722 0.0000000
## divider 0.7583722 0.0000000
## grey 0.7583722 0.0000000
## hdmi 0.7583722 0.0000000
## Warning in weighting(x): empty document(s): character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) charact
## [1] " Top_n stem TfIDf terms for descr.my:"
## Warning in weighting(x): empty document(s): character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) charact
## Warning in cor(TfIdf_mtrx, as.numeric(glb_allobs_df[, glb_txt_cor_var]), :
## the standard deviation is zero
## [1] "Rows: 747; Cols: 9"
## TfIdf term freq pos cor.y cor.y.abs TfIdf.N
## condit 207.7156 condit 499 137 -0.0418798096 0.0418798096 82.38883
## use 144.7700 use 291 709 0.0103720246 0.0103720246 51.46753
## scratch 126.4831 scratch 286 565 -0.0088060862 0.0088060862 49.35848
## new 124.1683 new 156 429 -0.0372353149 0.0372353149 50.77429
## good 120.3335 good 197 281 -0.0004368629 0.0004368629 44.58392
## screen 105.7897 screen 213 566 0.0232373651 0.0232373651 36.89203
## TfIdf.Y TfIdf.NA
## condit 56.35056 68.97623
## use 47.66515 45.63736
## scratch 40.20165 36.92302
## new 30.11628 43.27771
## good 38.21866 37.53088
## screen 37.92265 30.97500
## TfIdf term freq pos cor.y cor.y.abs TfIdf.N
## see 47.840365 see 53 573 0.004143357 0.004143357 17.631219
## small 31.644189 small 46 609 -0.001073854 0.001073854 11.168239
## upgrad 7.864791 upgrad 5 705 0.035370611 0.035370611 0.000000
## lighten 2.240183 lighten 2 362 -0.030076173 0.030076173 2.240183
## logic 1.625083 logic 1 374 0.025004068 0.025004068 0.000000
## discolor 1.421948 discolor 1 190 -0.021525023 0.021525023 1.421948
## TfIdf.Y TfIdf.NA
## see 16.501545 13.707601
## small 9.437755 11.038194
## upgrad 3.017885 4.846906
## lighten 0.000000 0.000000
## logic 1.625083 0.000000
## discolor 0.000000 0.000000
## TfIdf term freq pos cor.y cor.y.abs TfIdf.N
## red 0.8125416 red 1 532 NA NA 0.0000000
## version 0.8125416 version 1 716 -0.02152502 0.02152502 0.8125416
## adaptor 0.7583722 adaptor 1 31 0.02500407 0.02500407 0.0000000
## divid 0.7583722 divid 1 194 0.02500407 0.02500407 0.0000000
## grey 0.7583722 grey 1 286 0.02500407 0.02500407 0.0000000
## hdmi 0.7583722 hdmi 1 297 0.02500407 0.02500407 0.0000000
## TfIdf.Y TfIdf.NA
## red 0.0000000 0.8125416
## version 0.0000000 0.0000000
## adaptor 0.7583722 0.0000000
## divid 0.7583722 0.0000000
## grey 0.7583722 0.0000000
## hdmi 0.7583722 0.0000000
## TfIdf term freq pos cor.y cor.y.abs TfIdf.N
## red 0.8125416 red 1 532 NA NA 0.0000000
## version 0.8125416 version 1 716 -0.02152502 0.02152502 0.8125416
## adaptor 0.7583722 adaptor 1 31 0.02500407 0.02500407 0.0000000
## divid 0.7583722 divid 1 194 0.02500407 0.02500407 0.0000000
## grey 0.7583722 grey 1 286 0.02500407 0.02500407 0.0000000
## hdmi 0.7583722 hdmi 1 297 0.02500407 0.02500407 0.0000000
## TfIdf.Y TfIdf.NA
## red 0.0000000 0.8125416
## version 0.0000000 0.0000000
## adaptor 0.7583722 0.0000000
## divid 0.7583722 0.0000000
## grey 0.7583722 0.0000000
## hdmi 0.7583722 0.0000000
## Warning in weighting(x): empty document(s): character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) charact
## [,1]
## terms.n.post.stop -0.080072993
## terms.n.post.stop.log -0.063865173
## TfIdf.sum.post.stop -0.030336619
## terms.n.post.stem -0.079867739
## terms.n.post.stem.log -0.063843117
## TfIdf.sum.post.stem -0.032374274
## terms.n.stem.stop.Ratio 0.017579091
## TfIdf.sum.stem.stop.Ratio -0.001456838
## label step_major step_minor bgn end
## 5 extract.features_build.corpus 4 0 19.794 30.382
## 6 extract.features_extract.DTM 5 0 30.382 NA
## elapsed
## 5 10.588
## 6 NA
## [1] "Extracting TfIDf terms for descr.my..."
## Warning in weighting(x): empty document(s): character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) character(0) character(0) character(0) character(0)
## character(0) charact
## label step_major step_minor bgn end elapsed
## 6 extract.features_extract.DTM 5 0 30.382 31.608 1.226
## 7 extract.features_report.DTM 6 0 31.609 NA NA
## [1] "Reporting TfIDf terms for descr.my..."
## [1] " Full TermMatrix:"
## <<DocumentTermMatrix (documents: 2657, terms: 747)>>
## Non-/sparse entries: 8448/1976331
## Sparsity : 100%
## Maximal term length: 15
## Weighting : term frequency - inverse document frequency (normalized) (tf-idf)
## Warning in cor(TfIdf_mtrx, as.numeric(glb_allobs_df[, glb_txt_cor_var]), :
## the standard deviation is zero
## [1] " Sparse TermMatrix:"
## <<DocumentTermMatrix (documents: 2657, terms: 8)>>
## Non-/sparse entries: 2072/19184
## Sparsity : 90%
## Maximal term length: 7
## Weighting : term frequency - inverse document frequency (normalized) (tf-idf)
## Warning in myplot_scatter(plt_TfIdf_df, "freq.full", "TfIdf.full",
## colorcol_name = "in.sprs"): converting in.sprs to class:factor
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning: Removed 6 rows containing missing values (geom_path).
## Warning in rm(full_TfIdf_mtrx, full_TfIdf_df, melt_TfIdf_df,
## terms_TfIdf_df): object 'full_TfIdf_mtrx' not found
## label step_major step_minor bgn end elapsed
## 7 extract.features_report.DTM 6 0 31.609 33.722 2.113
## 8 extract.features_bind.DTM 7 0 33.722 NA NA
## Loading required package: tidyr
## [1] "Binding DTM for descr.my..."
## Warning in cor(TfIdf_mtrx, as.numeric(glb_allobs_df[, glb_txt_cor_var]), :
## the standard deviation is zero
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 83 rows containing missing values (geom_point).
## Warning: Removed 659 rows containing missing values (geom_text).
## Warning: Removed 659 rows containing missing values (geom_text).
## Warning: Removed 659 rows containing missing values (geom_text).
## Warning: Removed 659 rows containing missing values (geom_text).
## label step_major step_minor bgn end elapsed
## 8 extract.features_bind.DTM 7 0 33.722 40.551 6.829
## 9 extract.features_bind.DXM 8 0 40.552 NA NA
## [1] "Binding DXM for descr.my..."
## Warning in rm(log_X_df, txt_X_df): object 'log_X_df' not found
#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
# Use model info provided in description
mydsp_obs(list(description.contains="a[[:digit:]]"), cols=glb_dsp_cols, all=TRUE)
## UniqueID sold.fctr prdline.my sold .grpid color condition cellular
## 618 10618 Y iPad mini 1 <NA> Black Used 0
## 940 10940 N iPad 3 0 <NA> Black Used 1
## 2472 12474 <NA> Unknown NA <NA> Unknown Used Unknown
## carrier storage
## 618 None 16
## 940 Verizon 16
## 2472 Unknown Unknown
## descr.my
## 618 Nice Apple iPad Mini 16GB Wi- Fi 7.9" spacegray MF432LL/ A A1432 Locked It does work just cannot
## 940 LIKE NEW (MODEL A1430) + BLUETOOTH KEYBOARD (LATEST MODEL A1314), LEATHER CREAM SMART COVER, BLACK
## 2472 here we have spacegray apple ipad mini a1432 no charger works great has small nicks nothing major
glb_allobs_df[glb_allobs_df$UniqueID == 12474, "prdline.my"] <- "iPad mini"
glb_allobs_df[glb_allobs_df$UniqueID == 12474, "color"] <- "Space Gray"
glb_allobs_df[glb_allobs_df$UniqueID == 12474, "cellular"] <- "0"
glb_allobs_df[glb_allobs_df$UniqueID == 12474, "carrier"] <- "None"
mydsp_obs(list(description.contains="m(.{4})ll"), cols=glb_dsp_cols, all=TRUE)
## UniqueID sold.fctr prdline.my sold .grpid color
## 617 10617 Y iPad 2 1 <NA> White
## 618 10618 Y iPad mini 1 <NA> Black
## 992 10992 N iPad 2 0 <NA> White
## 1105 11105 N iPad mini Retina 0 <NA> Gold
## 1359 11360 N iPad 3 0 <NA> Unknown
## 1360 11361 Y Unknown 1 <NA> Unknown
## 1365 11366 Y iPad 1 1 <NA> Unknown
## 2637 12639 <NA> iPad 2 NA <NA> Black
## condition cellular carrier storage
## 617 Used 0 None 64
## 618 Used 0 None 16
## 992 Used 0 None 16
## 1105 Used 0 None 16
## 1359 Used Unknown Unknown Unknown
## 1360 Used Unknown Unknown Unknown
## 1365 Used Unknown Unknown Unknown
## 2637 For parts or not working 0 None 64
## descr.my
## 617 This a used Apple iPad 2 64GB, Wi- Fi, 9.7in - White (MC991LL/ A) shows signs of wear, has been
## 618 Nice Apple iPad Mini 16GB Wi- Fi 7.9" spacegray MF432LL/ A A1432 Locked It does work just cannot
## 992 Up for auction is this APPLE iPad 1st Gen Model MB292LL 16 GB of Memory Storage 9.7" touch screen
## 1105 Like New Condition Apple iPad Mini 3 MGYE2LL/ A 16GB Wi- Fi Gold Version Tablet/ eReader. Includes USB
## 1359 iPad 3 Black 64Gb storage Model Mc707ll/ a iPad is in very nice shape, glass and case
## 1360 APPLE iPAD AIR 32GB WHITE MD789LL/ B WHITE. This item is Previously Lightly Used, in Good Condition.
## 1365 Item still in complete working order, minor scratches, normal wear and tear but no damage. screen is
## 2637 IPAD 2 64GB BLACK MODEL MC916LL/ A WIFI ONLY MODEL. PICTURE OF IPAD IS ACTUAL UNIT YOU WILL RECEIVE.
glb_allobs_df[glb_allobs_df$UniqueID == 11360, "color"] <- "Black"
glb_allobs_df[glb_allobs_df$UniqueID == 11360, "storage"] <- "64"
glb_allobs_df[glb_allobs_df$UniqueID == 11360, "cellular"] <- "0"
glb_allobs_df[glb_allobs_df$UniqueID == 11360, "carrier"] <- "None"
glb_allobs_df[glb_allobs_df$UniqueID == 11361, "prdline.my"] <- "iPad Air"
glb_allobs_df[glb_allobs_df$UniqueID == 11361, "storage"] <- "32"
glb_allobs_df[glb_allobs_df$UniqueID == 11361, "color"] <- "White"
glb_allobs_df[glb_allobs_df$UniqueID == 11361, "cellular"] <- "0"
glb_allobs_df[glb_allobs_df$UniqueID == 11361, "carrier"] <- "None"
# mydsp_obs(list(description.contains="mini(?!m)"), perl=TRUE, cols="D.P.mini", all=TRUE)
# mydsp_obs(list(D.P.mini=1), cols="D.P.mini", all=TRUE)
# mydsp_obs(list(D.P.mini=1, productline="Unknown"), cols="D.P.mini", all=TRUE)
# mydsp_obs(list(description.contains="(?<![fhp])air"), perl=TRUE, all=TRUE)
# mydsp_obs(list(description.contains="air"), perl=FALSE, cols="D.P.air", all=TRUE)
# mydsp_obs(list(D.P.air=1, productline="Unknown"), cols="D.P.air", all=TRUE)
print(mycreate_sqlxtab_df(glb_allobs_df, c("prdline.my", "productline", "D.P.mini",
glb_rsp_var)))
## prdline.my productline D.P.mini sold.fctr .n
## 1 iPad 2 iPad 2 0 <NA> 154
## 2 iPad 2 iPad 2 0 Y 147
## 3 iPad 2 iPad 2 0 N 139
## 4 iPad mini iPad mini 0 N 138
## 5 iPad mini iPad mini 0 Y 126
## 6 iPad 1 iPad 1 0 Y 125
## 7 Unknown Unknown 0 N 121
## 8 iPad mini iPad mini 0 <NA> 108
## 9 iPad Air iPad Air 0 N 102
## 10 iPad 1 iPad 1 0 N 100
## 11 iPad Air 2 iPad Air 2 0 N 100
## 12 iPad 4 iPad 4 0 N 93
## 13 Unknown Unknown 0 <NA> 89
## 14 iPad 1 iPad 1 0 <NA> 88
## 15 Unknown Unknown 0 Y 80
## 16 iPad 3 iPad 3 0 Y 80
## 17 iPad Air iPad Air 0 Y 78
## 18 iPad Air iPad Air 0 <NA> 74
## 19 iPad 3 iPad 3 0 N 73
## 20 iPad Air 2 iPad Air 2 0 Y 71
## 21 iPad 4 iPad 4 0 <NA> 68
## 22 iPad 4 iPad 4 0 Y 64
## 23 iPad Air 2 iPad Air 2 0 <NA> 62
## 24 iPad mini 3 iPad mini 3 0 N 61
## 25 iPad mini 2 iPad mini 2 0 N 56
## 26 iPad 3 iPad 3 0 <NA> 55
## 27 iPad mini 2 iPad mini 2 0 <NA> 52
## 28 iPad mini 2 iPad mini 2 0 Y 48
## 29 iPad mini 3 iPad mini 3 0 <NA> 35
## 30 iPad mini 3 iPad mini 3 0 Y 27
## 31 iPad mini iPad mini 1 N 7
## 32 iPad mini iPad mini 1 Y 5
## 33 iPad mini 2 iPad mini 2 1 <NA> 4
## 34 iPad mini Retina iPad mini Retina 0 Y 4
## 35 iPad mini iPad mini 1 <NA> 3
## 36 iPad mini 3 iPad mini 3 1 <NA> 3
## 37 iPad mini Retina iPad mini Retina 0 N 3
## 38 Unknown Unknown 1 <NA> 2
## 39 iPad mini 2 iPad mini 2 1 N 2
## 40 iPad mini 3 iPad mini 3 1 N 2
## 41 Unknown Unknown 1 N 1
## 42 Unknown Unknown 1 Y 1
## 43 iPad 5 iPad 5 0 Y 1
## 44 iPad Air Unknown 0 Y 1
## 45 iPad mini Unknown 1 <NA> 1
## 46 iPad mini iPad mini 2 Y 1
## 47 iPad mini 2 iPad mini 2 1 Y 1
## 48 iPad mini Retina iPad mini Retina 1 N 1
print(glb_allobs_df[(glb_allobs_df$productline == "Unknown") &
(glb_allobs_df$D.P.mini > 0),
c(glb_id_var, glb_category_var, glb_dsp_cols, glb_txt_vars)])
## UniqueID prdline.my sold .grpid color condition
## 1172 11172 Unknown 0 8 Unknown Used
## 1803 11804 Unknown 1 <NA> White Seller refurbished
## 2223 12225 Unknown NA 8 Unknown Used
## 2472 12474 iPad mini NA <NA> Space Gray Used
## 2623 12625 Unknown NA <NA> White For parts or not working
## cellular carrier storage
## 1172 Unknown Unknown 16
## 1803 1 AT&T Unknown
## 2223 Unknown Unknown 16
## 2472 0 None Unknown
## 2623 Unknown Unknown Unknown
## descr.my
## 1172 IPAD mini . not sure of what generation it can be. selling as is or best offer. had a crack but
## 1803 30 Day Warranty. Refurbished iPad Mini with signs of normal wear including possible scratching on
## 2223 IPAD mini . not sure of what generation it can be. selling as is or best offer. had a crack but
## 2472 here we have spacegray apple ipad mini a1432 no charger works great has small nicks nothing major
## 2623 Lot of 10 mixed iPad minis. Colors, models & storage capacity vary between each lot. There may be
glb_allobs_df[(glb_allobs_df$D.P.mini == 1) & (glb_allobs_df$productline == "Unknown"),
"prdline.my"] <- "iPad mini"
print(mycreate_sqlxtab_df(glb_allobs_df, c("prdline.my", "productline", "D.P.air",
glb_rsp_var)))
## prdline.my productline D.P.air sold.fctr .n
## 1 iPad 2 iPad 2 0 <NA> 154
## 2 iPad 2 iPad 2 0 Y 147
## 3 iPad mini iPad mini 0 N 145
## 4 iPad 2 iPad 2 0 N 139
## 5 iPad mini iPad mini 0 Y 132
## 6 iPad 1 iPad 1 0 Y 125
## 7 Unknown Unknown 0 N 120
## 8 iPad mini iPad mini 0 <NA> 111
## 9 iPad 1 iPad 1 0 N 100
## 10 iPad Air iPad Air 0 N 98
## 11 iPad Air 2 iPad Air 2 0 N 97
## 12 iPad 4 iPad 4 0 N 92
## 13 Unknown Unknown 0 <NA> 88
## 14 iPad 1 iPad 1 0 <NA> 88
## 15 Unknown Unknown 0 Y 80
## 16 iPad 3 iPad 3 0 Y 79
## 17 iPad Air iPad Air 0 Y 75
## 18 iPad 3 iPad 3 0 N 73
## 19 iPad Air iPad Air 0 <NA> 73
## 20 iPad Air 2 iPad Air 2 0 Y 69
## 21 iPad 4 iPad 4 0 <NA> 68
## 22 iPad 4 iPad 4 0 Y 64
## 23 iPad mini 3 iPad mini 3 0 N 63
## 24 iPad Air 2 iPad Air 2 0 <NA> 60
## 25 iPad mini 2 iPad mini 2 0 N 58
## 26 iPad 3 iPad 3 0 <NA> 55
## 27 iPad mini 2 iPad mini 2 0 <NA> 55
## 28 iPad mini 2 iPad mini 2 0 Y 49
## 29 iPad mini 3 iPad mini 3 0 <NA> 38
## 30 iPad mini 3 iPad mini 3 0 Y 27
## 31 iPad Air iPad Air 1 N 4
## 32 iPad mini Retina iPad mini Retina 0 N 4
## 33 iPad mini Retina iPad mini Retina 0 Y 4
## 34 iPad Air iPad Air 1 Y 3
## 35 iPad mini Unknown 0 <NA> 3
## 36 iPad Air 2 iPad Air 2 1 <NA> 2
## 37 iPad Air 2 iPad Air 2 1 N 2
## 38 iPad Air 2 iPad Air 2 1 Y 2
## 39 Unknown Unknown 1 <NA> 1
## 40 Unknown Unknown 1 N 1
## 41 iPad 3 iPad 3 1 Y 1
## 42 iPad 4 iPad 4 1 N 1
## 43 iPad 5 iPad 5 0 Y 1
## 44 iPad Air Unknown 1 Y 1
## 45 iPad Air iPad Air 1 <NA> 1
## 46 iPad Air 2 iPad Air 2 2 N 1
## 47 iPad mini Unknown 0 N 1
## 48 iPad mini Unknown 0 Y 1
## 49 iPad mini 2 iPad mini 2 1 <NA> 1
print(glb_allobs_df[(glb_allobs_df$productline == "Unknown") &
(glb_allobs_df$D.P.air > 0),
c(glb_id_var, glb_category_var, glb_dsp_cols, glb_txt_vars)])
## UniqueID prdline.my sold .grpid color condition cellular carrier
## 946 10946 Unknown 0 <NA> Unknown Used Unknown Unknown
## 1360 11361 iPad Air 1 <NA> White Used 0 None
## 2433 12435 Unknown NA <NA> Space Gray Used Unknown Unknown
## storage
## 946 Unknown
## 1360 32
## 2433 128
## descr.my
## 946 Gently used apple iPad Air, no scratches on screen and almost no visible wear on back of item. No
## 1360 APPLE iPAD AIR 32GB WHITE MD789LL/ B WHITE. This item is Previously Lightly Used, in Good Condition.
## 2433 ***128gb*** black/ spacegray iPad Air excellent used condition(no scratches, dents, or blemishes)
#glb_allobs_df[glb_allobs_df$UniqueID == 11863, "D.P.air"] <- 0
glb_allobs_df[(glb_allobs_df$D.P.air == 1) & (glb_allobs_df$productline == "Unknown"),
"prdline.my"] <- "iPad Air"
print(glb_allobs_df[(glb_allobs_df$UniqueID %in% c(11767, 11811, 12156)),
c(glb_id_var, "sold",
"prdline.my", "color", "condition", "cellular", "carrier", "storage", "descr.my")])
## UniqueID sold prdline.my color condition cellular
## 1766 11767 0 Unknown Unknown For parts or not working Unknown
## 1810 11811 0 Unknown Black Seller refurbished 0
## 2154 12156 NA Unknown Black Used 0
## carrier storage
## 1766 Unknown Unknown
## 1810 None Unknown
## 2154 None 32
## descr.my
## 1766 Ipad 2 32gb Housing. Some scratches and small dents, but overall good condition.
## 1810 30 Day Warranty. Refurbished iPad 2 with scratching on screen and wear on back plate. Comes with
## 2154 Original IPAD 1st generation - used one owner (myself)Good shape as pictured. Fully functional as
glb_allobs_df[glb_allobs_df$UniqueID == 11767, "prdline.my"] <- "iPad 2"
glb_allobs_df[glb_allobs_df$UniqueID == 11767, "storage"] <- "32"
glb_allobs_df[glb_allobs_df$UniqueID == 11811, "prdline.my"] <- "iPad 2"
glb_allobs_df[glb_allobs_df$UniqueID == 12156, "prdline.my"] <- "iPad 1"
# mydsp_obs(list(prdline.my="Unknown"), all=TRUE)
tmp_allobs_df <- glb_allobs_df[, "prdline.my", FALSE]
names(tmp_allobs_df) <- "old.prdline.my"
glb_allobs_df$prdline.my <-
plyr::revalue(glb_allobs_df$prdline.my, c(
# "iPad 1" = "iPad",
# "iPad 2" = "iPad2+",
"iPad 3" = "iPad 3+",
"iPad 4" = "iPad 3+",
"iPad 5" = "iPad 3+",
"iPad Air" = "iPadAir",
"iPad Air 2" = "iPadAir",
"iPad mini" = "iPadmini",
"iPad mini 2" = "iPadmini 2+",
"iPad mini 3" = "iPadmini 2+",
"iPad mini Retina" = "iPadmini 2+"
))
tmp_allobs_df$prdline.my <- glb_allobs_df[, "prdline.my"]
print(mycreate_sqlxtab_df(tmp_allobs_df, c("prdline.my", "old.prdline.my")))
## prdline.my old.prdline.my .n
## 1 iPad 2 iPad 2 442
## 2 iPadmini iPad mini 393
## 3 iPad 1 iPad 1 314
## 4 Unknown Unknown 285
## 5 iPadAir iPad Air 257
## 6 iPadAir iPad Air 2 233
## 7 iPad 3+ iPad 4 225
## 8 iPad 3+ iPad 3 208
## 9 iPadmini 2+ iPad mini 2 163
## 10 iPadmini 2+ iPad mini 3 128
## 11 iPadmini 2+ iPad mini Retina 8
## 12 iPad 3+ iPad 5 1
print(mycreate_sqlxtab_df(tmp_allobs_df, c("prdline.my")))
## prdline.my .n
## 1 iPadAir 490
## 2 iPad 2 442
## 3 iPad 3+ 434
## 4 iPadmini 393
## 5 iPad 1 314
## 6 iPadmini 2+ 299
## 7 Unknown 285
print(mycreate_sqlxtab_df(subset(glb_allobs_df, color == "Unknown"),
c("color", "D.P.black", "D.P.gold", "D.P.spacegray", "D.P.white")))
## color D.P.black D.P.gold D.P.spacegray D.P.white .n
## 1 Unknown 0 0 0 0 1017
## 2 Unknown 0 0 0 1 4
## 3 Unknown 1 0 0 0 4
## 4 Unknown 0 0 1 0 1
## 5 Unknown 1 0 0 1 1
print(glb_allobs_df[(glb_allobs_df$color == "Unknown") & (glb_allobs_df$D.P.black > 0),
c(glb_id_var, "color", "D.P.black", "sold", "prdline.my", "condition",
"cellular", "carrier", "storage", "descr.my")])
## UniqueID color D.P.black sold prdline.my condition cellular carrier
## 631 10631 Unknown 1 1 iPad 2 Used 1 AT&T
## 683 10683 Unknown 1 0 iPad 2 Used 0 None
## 858 10858 Unknown 1 1 iPad 3+ Used 0 None
## 1243 11244 Unknown 1 0 Unknown Used Unknown Unknown
## 2135 12137 Unknown 1 NA iPad 1 Used 1 AT&T
## storage
## 631 16
## 683 32
## 858 16
## 1243 Unknown
## 2135 16
## descr.my
## 631 Very good condition. Minor bumps and bruises. Only scratches on screen are in non- viewing black
## 683 Comes with folding black case and is engraved in small letters on the back. Still works perfectly
## 858 screen cracked. name engraving in the back (blacked out)
## 1243 Ipad is in fair condition. Minor scratches on back. Edge around screen is black instead of white.
## 2135 Device is in AVERAGE used cosmetic condition with heavy scratches and wear. Color is black . Device is
glb_allobs_df[glb_allobs_df$UniqueID == 12137, "color"] <- "Black"
print(glb_allobs_df[(glb_allobs_df$color == "Unknown") & (glb_allobs_df$D.P.spacegray > 0),
c(glb_id_var, "color", "D.P.spacegray", "prdline.my", "condition",
"cellular", "carrier", "storage", "descr.my")])
## UniqueID color D.P.spacegray prdline.my condition cellular carrier
## 2104 12106 Unknown 1 iPadAir Used 0 None
## storage
## 2104 16
## descr.my
## 2104 This is an iPad Air first generation (spacegray color). It's a used iPad (just like new) as shown in the
glb_allobs_df[glb_allobs_df$UniqueID %in% c(12106), "color"] <- "Space Gray"
print(glb_allobs_df[(glb_allobs_df$color == "Unknown") & (glb_allobs_df$D.P.white > 0),
c(glb_id_var, "color", "D.P.white", "prdline.my", "condition",
"cellular", "carrier", "storage", "descr.my")])
## UniqueID color D.P.white prdline.my condition
## 573 10573 Unknown 1 iPadmini 2+ Used
## 809 10809 Unknown 1 iPad 3+ Used
## 925 10925 Unknown 1 iPadmini 2+ Used
## 1243 11244 Unknown 1 Unknown Used
## 1734 11735 Unknown 1 iPad 3+ For parts or not working
## cellular carrier storage
## 573 0 None 16
## 809 0 None 64
## 925 0 None 64
## 1243 Unknown Unknown Unknown
## 1734 1 Verizon 16
## descr.my
## 573 Like new white iPad mini no scratches always kept in case, sold with keyboard, box and cords
## 809 iPad 3 gen. 64GB, white, wifi- only. Condition = good as new, very minor sign of use. No charger.
## 925 iPad mini 2/ Retina Display/ Latest Model/ 64GB/ Wi- Fi/ Silver&White . Near Mint Condition excellent
## 1243 Ipad is in fair condition. Minor scratches on back. Edge around screen is black instead of white.
## 1734 Device is in POOR used cosmetic condition with cracked outer glass. Color is White. Device is
glb_allobs_df[glb_allobs_df$UniqueID %in% c(10573, 10809, 10925, 11735), "color"] <-
"White"
glb_allobs_df$carrier.fctr <- as.factor(glb_allobs_df$carrier)
glb_allobs_df$cellular.fctr <- as.factor(glb_allobs_df$cellular)
glb_allobs_df$color.fctr <- as.factor(glb_allobs_df$color)
glb_allobs_df$prdline.my.fctr <- as.factor(glb_allobs_df$prdline.my)
glb_allobs_df$storage.fctr <- as.factor(glb_allobs_df$storage)
#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
# glb_allobs_df %>%
# unite(prdl.my.descr, c(prdline.my, as.numeric(D.nchrs.log > 0), sep="#"))
glb_allobs_df$prdl.my.descr.fctr <- as.factor(paste(glb_allobs_df$prdline.my,
as.numeric(glb_allobs_df$D.nchrs.log > 0),
sep="#"))
print(table(glb_allobs_df$prdl.my.descr.fctr, glb_allobs_df$sold, useNA = "ifany"))
##
## 0 1 <NA>
## Unknown#0 72 47 45
## Unknown#1 46 33 42
## iPad 1#0 53 69 46
## iPad 1#1 47 56 43
## iPad 2#0 57 80 83
## iPad 2#1 84 67 71
## iPad 3+#0 58 87 59
## iPad 3+#1 108 58 64
## iPadAir#0 125 95 88
## iPadAir#1 78 55 49
## iPadmini 2+#0 95 59 64
## iPadmini 2+#1 30 21 30
## iPadmini#0 94 79 65
## iPadmini#1 52 54 49
print(mycreate_sqlxtab_df(glb_allobs_df, c("prdl.my.descr.fctr", "sold")))
## prdl.my.descr.fctr sold .n
## 1 iPadAir#0 0 125
## 2 iPad 3+#1 0 108
## 3 iPadAir#0 1 95
## 4 iPadmini 2+#0 0 95
## 5 iPadmini#0 0 94
## 6 iPadAir#0 NA 88
## 7 iPad 3+#0 1 87
## 8 iPad 2#1 0 84
## 9 iPad 2#0 NA 83
## 10 iPad 2#0 1 80
## 11 iPadmini#0 1 79
## 12 iPadAir#1 0 78
## 13 Unknown#0 0 72
## 14 iPad 2#1 NA 71
## 15 iPad 1#0 1 69
## 16 iPad 2#1 1 67
## 17 iPadmini#0 NA 65
## 18 iPad 3+#1 NA 64
## 19 iPadmini 2+#0 NA 64
## 20 iPad 3+#0 NA 59
## 21 iPadmini 2+#0 1 59
## 22 iPad 3+#0 0 58
## 23 iPad 3+#1 1 58
## 24 iPad 2#0 0 57
## 25 iPad 1#1 1 56
## 26 iPadAir#1 1 55
## 27 iPadmini#1 1 54
## 28 iPad 1#0 0 53
## 29 iPadmini#1 0 52
## 30 iPadAir#1 NA 49
## 31 iPadmini#1 NA 49
## 32 Unknown#0 1 47
## 33 iPad 1#1 0 47
## 34 Unknown#1 0 46
## 35 iPad 1#0 NA 46
## 36 Unknown#0 NA 45
## 37 iPad 1#1 NA 43
## 38 Unknown#1 NA 42
## 39 Unknown#1 1 33
## 40 iPadmini 2+#1 NA 30
## 41 iPadmini 2+#1 0 30
## 42 iPadmini 2+#1 1 21
glb_category_var <- "prdl.my.descr.fctr"
# print(sapply(names(glb_trnobs_df), function(col) sum(is.na(glb_trnobs_df[, col]))))
# print(sapply(names(glb_newobs_df), function(col) sum(is.na(glb_newobs_df[, col]))))
# print(myplot_scatter(glb_trnobs_df, "<col1_name>", "<col2_name>", smooth=TRUE))
rm(corpus_lst, full_TfIdf_DTM, full_TfIdf_vctr,
glb_full_DTM_lst, glb_sprs_DTM_lst, txt_corpus, txt_vctr)
## Warning in rm(corpus_lst, full_TfIdf_DTM, full_TfIdf_vctr,
## glb_full_DTM_lst, : object 'corpus_lst' not found
## Warning in rm(corpus_lst, full_TfIdf_DTM, full_TfIdf_vctr,
## glb_full_DTM_lst, : object 'full_TfIdf_vctr' not found
extract.features_chunk_df <- myadd_chunk(extract.features_chunk_df, "extract.features_end",
major.inc=TRUE)
## label step_major step_minor bgn end elapsed
## 9 extract.features_bind.DXM 8 0 40.552 51.802 11.25
## 10 extract.features_end 9 0 51.802 NA NA
myplt_chunk(extract.features_chunk_df)
## label step_major
## 9 extract.features_bind.DXM 8
## 5 extract.features_build.corpus 4
## 8 extract.features_bind.DTM 7
## 7 extract.features_report.DTM 6
## 3 extract.features_process.text 3
## 6 extract.features_extract.DTM 5
## 2 extract.features_factorize.str.vars 2
## 1 extract.features_bgn 1
## 4 extract.features_process.text_reporting_compound_terms 3
## step_minor bgn end elapsed duration
## 9 0 40.552 51.802 11.250 11.250
## 5 0 19.794 30.382 10.588 10.588
## 8 0 33.722 40.551 6.829 6.829
## 7 0 31.609 33.722 2.113 2.113
## 3 0 18.174 19.789 1.615 1.615
## 6 0 30.382 31.608 1.226 1.226
## 2 0 17.889 18.173 0.284 0.284
## 1 0 17.874 17.888 0.014 0.014
## 4 1 19.789 19.794 0.005 0.005
## [1] "Total Elapsed Time: 51.802 secs"
# if (glb_save_envir)
# save(glb_feats_df,
# glb_allobs_df, #glb_trnobs_df, glb_fitobs_df, glb_OOBobs_df, glb_newobs_df,
# file=paste0(glb_out_pfx, "extract_features_dsk.RData"))
# load(paste0(glb_out_pfx, "extract_features_dsk.RData"))
replay.petrisim(pn=glb_analytics_pn,
replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs,
"data.training.all","data.new")), flip_coord=TRUE)
## time trans "bgn " "fit.data.training.all " "predict.data.new " "end "
## 0.0000 multiple enabled transitions: data.training.all data.new model.selected firing: data.training.all
## 1.0000 1 2 1 0 0
## 1.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction firing: data.new
## 2.0000 2 1 1 1 0
glb_chunks_df <- myadd_chunk(glb_chunks_df, "cluster.data", major.inc=TRUE)
## label step_major step_minor bgn end elapsed
## 5 extract.features 3 0 17.868 53.117 35.249
## 6 cluster.data 4 0 53.117 NA NA
4.0: cluster dataglb_chunks_df <- myadd_chunk(glb_chunks_df, "manage.missing.data", major.inc=FALSE)
## label step_major step_minor bgn end elapsed
## 6 cluster.data 4 0 53.117 54.646 1.529
## 7 manage.missing.data 4 1 54.646 NA NA
# If mice crashes with error: Error in get(as.character(FUN), mode = "function", envir = envir) : object 'State' of mode 'function' was not found
# consider excluding 'State' as a feature
# print(sapply(names(glb_trnobs_df), function(col) sum(is.na(glb_trnobs_df[, col]))))
# print(sapply(names(glb_newobs_df), function(col) sum(is.na(glb_newobs_df[, col]))))
# glb_trnobs_df <- na.omit(glb_trnobs_df)
# glb_newobs_df <- na.omit(glb_newobs_df)
# df[is.na(df)] <- 0
mycheck_problem_data(glb_allobs_df)
## [1] "numeric data missing in : "
## sold sold.fctr
## 798 798
## [1] "numeric data w/ 0s in : "
## biddable sold startprice.log
## 1444 999 31
## cellular.fctr D.terms.n.post.stop D.terms.n.post.stop.log
## 1600 1521 1521
## D.TfIdf.sum.post.stop D.terms.n.post.stem D.terms.n.post.stem.log
## 1521 1521 1521
## D.TfIdf.sum.post.stem D.T.condit D.T.use
## 1521 2158 2366
## D.T.scratch D.T.new D.T.good
## 2371 2501 2460
## D.T.screen D.T.great D.T.ipad
## 2444 2532 2425
## D.T.work D.T.excel D.T.like
## 2459 2557 2584
## D.T.box D.T.function. D.T.item
## 2547 2541 2528
## D.T.fulli D.T.cosmet D.T.minor
## 2569 2540 2540
## D.T.mint D.T.crack D.T.wear
## 2594 2580 2556
## D.T.perfect D.T.includ D.T.lock
## 2602 2574 2614
## D.T.case D.T.icloud D.T.see
## 2575 2601 2604
## D.T.light D.T.devic D.T.pleas
## 2576 2577 2590
## D.T.back D.T.origin D.T.dent
## 2580 2599 2592
## D.T.hous D.T.sign D.T.open
## 2585 2580 2613
## D.T.clean D.T.will D.T.appl
## 2615 2618 2598
## D.T.charger D.T.damag D.T.X100
## 2619 2626 2593
## D.T.come D.T.scuff D.T.corner
## 2602 2615 2612
## D.T.small D.T.broken D.T.descript
## 2611 2637 2624
## D.T.unit D.T.refurbish D.T.show
## 2617 2623 2606
## D.T.shape D.T.read D.T.test
## 2632 2626 2620
## D.T.pictur D.T.bare D.T.brand
## 2624 2637 2627
## D.T.list D.T.may D.T.mark
## 2616 2619 2629
## D.T.blemish D.T.packag D.T.mini
## 2625 2631 2623
## D.T.affect D.T.normal D.T.tab
## 2629 2626 2630
## D.T.top D.T.accessori D.T.ding
## 2633 2629 2632
## D.T.near D.T.digit D.T.photo
## 2623 2639 2634
## D.T.tear D.T.display D.T.minim
## 2626 2634 2629
## D.T.wifi D.T.order D.T.protector
## 2632 2636 2639
## D.T.kept D.T.right D.T.previous
## 2637 2638 2634
## D.T.button D.T.alway D.T.contact
## 2638 2639 2642
## D.T.fair D.T.air D.T.esn
## 2635 2636 2641
## D.T.full D.T.averag D.T.free
## 2641 2642 2638
## D.T.sinc D.T.imei D.T.cabl
## 2640 2640 2639
## D.T.seal D.T.profession D.T.overal
## 2647 2641 2643
## D.T.retail D.T.refer D.T.left
## 2648 2646 2646
## D.T.stock D.T.two D.T.detail
## 2643 2648 2650
## D.T.bodi D.T.seller D.T.activ
## 2648 2643 2648
## D.T.phone D.T.problem D.T.manufactur
## 2647 2651 2649
## D.T.side D.T.certifi D.T.ship
## 2648 2647 2646
## D.T.chip D.T.edg D.T.inspect
## 2651 2647 2648
## D.T.heavili D.T.keyboard D.T.non
## 2646 2651 2649
## D.T.geek D.T.squad D.T.handset
## 2652 2652 2650
## D.T.upper D.T.sticker D.T.scroll
## 2651 2649 2652
## D.T.must D.T.contain D.T.imag
## 2649 2652 2654
## D.T.qualiti D.T.anoth D.T.pic
## 2651 2652 2653
## D.T.least D.T.correct D.T.featur
## 2653 2652 2652
## D.T.technician D.T.super D.T.expect
## 2652 2655 2655
## D.T.sync D.T.speaker D.T.name
## 2652 2654 2654
## D.T.lightn D.T.X2016 D.T.passcod
## 2652 2653 2654
## D.T.money D.T.els D.T.stylus
## 2655 2654 2655
## D.T.corpor D.T.intro D.T.higher
## 2655 2656 2656
## D.T.beetl D.T.defens D.T.disclaim
## 2656 2656 2656
## D.T.essenti D.T.final D.T.repeat.
## 2656 2656 2656
## D.nwrds.log D.nwrds.unq.log D.sum.TfIdf
## 1520 1521 1521
## D.ratio.sum.TfIdf.nwrds D.nchrs.log D.nuppr.log
## 1521 1520 1522
## D.ndgts.log D.npnct01.log D.npnct02.log
## 2427 2579 2657
## D.npnct03.log D.npnct04.log D.npnct05.log
## 2614 2657 2592
## D.npnct06.log D.npnct07.log D.npnct08.log
## 2554 2656 2581
## D.npnct09.log D.npnct10.log D.npnct11.log
## 2641 2648 2301
## D.npnct12.log D.npnct13.log D.npnct14.log
## 2538 1932 2582
## D.npnct15.log D.npnct16.log D.npnct17.log
## 2637 2546 2657
## D.npnct18.log D.npnct19.log D.npnct20.log
## 2656 2657 2657
## D.npnct21.log D.npnct22.log D.npnct23.log
## 2657 2657 2657
## D.npnct24.log D.npnct25.log D.npnct26.log
## 1520 2657 2657
## D.npnct27.log D.npnct28.log D.npnct29.log
## 2657 2649 2657
## D.npnct30.log D.nstopwrds.log D.P.http
## 2657 1664 2657
## D.P.mini D.P.air D.P.black
## 2623 2636 2640
## D.P.white D.P.gold D.P.spacegray
## 2647 2655 2650
## [1] "numeric data w/ Infs in : "
## named integer(0)
## [1] "numeric data w/ NaNs in : "
## named integer(0)
## [1] "string data missing in : "
## description condition cellular carrier color storage
## 1520 0 0 0 0 0
## productline .grpid prdline.my descr.my
## 0 NA 0 1520
# glb_allobs_df <- na.omit(glb_allobs_df)
# Not refactored into mydsutils.R since glb_*_df might be reassigned
glb_impute_missing_data <- function() {
require(mice)
set.seed(glb_mice_complete.seed)
inp_impent_df <- glb_allobs_df[, setdiff(names(glb_allobs_df),
union(glb_exclude_vars_as_features, glb_rsp_var))]
print("Summary before imputation: ")
print(summary(inp_impent_df))
out_impent_df <- complete(mice(inp_impent_df))
print(summary(out_impent_df))
ret_vars <- sapply(names(out_impent_df),
function(col) ifelse(!identical(out_impent_df[, col],
inp_impent_df[, col]),
col, ""))
ret_vars <- ret_vars[ret_vars != ""]
# complete(mice()) changes attributes of factors even though values don't change
for (col in ret_vars) {
if (inherits(out_impent_df[, col], "factor")) {
if (identical(as.numeric(out_impent_df[, col]),
as.numeric(inp_impent_df[, col])))
ret_vars <- setdiff(ret_vars, col)
}
}
return(out_impent_df[, ret_vars])
}
if (glb_impute_na_data &&
(length(myfind_numerics_missing(glb_allobs_df)) > 0) &&
(ncol(nonna_df <- glb_impute_missing_data()) > 0)) {
for (col in names(nonna_df)) {
glb_allobs_df[, paste0(col, ".nonNA")] <- nonna_df[, col]
glb_exclude_vars_as_features <- c(glb_exclude_vars_as_features, col)
}
}
mycheck_problem_data(glb_allobs_df, terminate = TRUE)
## [1] "numeric data missing in : "
## sold sold.fctr
## 798 798
## [1] "numeric data w/ 0s in : "
## biddable sold startprice.log
## 1444 999 31
## cellular.fctr D.terms.n.post.stop D.terms.n.post.stop.log
## 1600 1521 1521
## D.TfIdf.sum.post.stop D.terms.n.post.stem D.terms.n.post.stem.log
## 1521 1521 1521
## D.TfIdf.sum.post.stem D.T.condit D.T.use
## 1521 2158 2366
## D.T.scratch D.T.new D.T.good
## 2371 2501 2460
## D.T.screen D.T.great D.T.ipad
## 2444 2532 2425
## D.T.work D.T.excel D.T.like
## 2459 2557 2584
## D.T.box D.T.function. D.T.item
## 2547 2541 2528
## D.T.fulli D.T.cosmet D.T.minor
## 2569 2540 2540
## D.T.mint D.T.crack D.T.wear
## 2594 2580 2556
## D.T.perfect D.T.includ D.T.lock
## 2602 2574 2614
## D.T.case D.T.icloud D.T.see
## 2575 2601 2604
## D.T.light D.T.devic D.T.pleas
## 2576 2577 2590
## D.T.back D.T.origin D.T.dent
## 2580 2599 2592
## D.T.hous D.T.sign D.T.open
## 2585 2580 2613
## D.T.clean D.T.will D.T.appl
## 2615 2618 2598
## D.T.charger D.T.damag D.T.X100
## 2619 2626 2593
## D.T.come D.T.scuff D.T.corner
## 2602 2615 2612
## D.T.small D.T.broken D.T.descript
## 2611 2637 2624
## D.T.unit D.T.refurbish D.T.show
## 2617 2623 2606
## D.T.shape D.T.read D.T.test
## 2632 2626 2620
## D.T.pictur D.T.bare D.T.brand
## 2624 2637 2627
## D.T.list D.T.may D.T.mark
## 2616 2619 2629
## D.T.blemish D.T.packag D.T.mini
## 2625 2631 2623
## D.T.affect D.T.normal D.T.tab
## 2629 2626 2630
## D.T.top D.T.accessori D.T.ding
## 2633 2629 2632
## D.T.near D.T.digit D.T.photo
## 2623 2639 2634
## D.T.tear D.T.display D.T.minim
## 2626 2634 2629
## D.T.wifi D.T.order D.T.protector
## 2632 2636 2639
## D.T.kept D.T.right D.T.previous
## 2637 2638 2634
## D.T.button D.T.alway D.T.contact
## 2638 2639 2642
## D.T.fair D.T.air D.T.esn
## 2635 2636 2641
## D.T.full D.T.averag D.T.free
## 2641 2642 2638
## D.T.sinc D.T.imei D.T.cabl
## 2640 2640 2639
## D.T.seal D.T.profession D.T.overal
## 2647 2641 2643
## D.T.retail D.T.refer D.T.left
## 2648 2646 2646
## D.T.stock D.T.two D.T.detail
## 2643 2648 2650
## D.T.bodi D.T.seller D.T.activ
## 2648 2643 2648
## D.T.phone D.T.problem D.T.manufactur
## 2647 2651 2649
## D.T.side D.T.certifi D.T.ship
## 2648 2647 2646
## D.T.chip D.T.edg D.T.inspect
## 2651 2647 2648
## D.T.heavili D.T.keyboard D.T.non
## 2646 2651 2649
## D.T.geek D.T.squad D.T.handset
## 2652 2652 2650
## D.T.upper D.T.sticker D.T.scroll
## 2651 2649 2652
## D.T.must D.T.contain D.T.imag
## 2649 2652 2654
## D.T.qualiti D.T.anoth D.T.pic
## 2651 2652 2653
## D.T.least D.T.correct D.T.featur
## 2653 2652 2652
## D.T.technician D.T.super D.T.expect
## 2652 2655 2655
## D.T.sync D.T.speaker D.T.name
## 2652 2654 2654
## D.T.lightn D.T.X2016 D.T.passcod
## 2652 2653 2654
## D.T.money D.T.els D.T.stylus
## 2655 2654 2655
## D.T.corpor D.T.intro D.T.higher
## 2655 2656 2656
## D.T.beetl D.T.defens D.T.disclaim
## 2656 2656 2656
## D.T.essenti D.T.final D.T.repeat.
## 2656 2656 2656
## D.nwrds.log D.nwrds.unq.log D.sum.TfIdf
## 1520 1521 1521
## D.ratio.sum.TfIdf.nwrds D.nchrs.log D.nuppr.log
## 1521 1520 1522
## D.ndgts.log D.npnct01.log D.npnct02.log
## 2427 2579 2657
## D.npnct03.log D.npnct04.log D.npnct05.log
## 2614 2657 2592
## D.npnct06.log D.npnct07.log D.npnct08.log
## 2554 2656 2581
## D.npnct09.log D.npnct10.log D.npnct11.log
## 2641 2648 2301
## D.npnct12.log D.npnct13.log D.npnct14.log
## 2538 1932 2582
## D.npnct15.log D.npnct16.log D.npnct17.log
## 2637 2546 2657
## D.npnct18.log D.npnct19.log D.npnct20.log
## 2656 2657 2657
## D.npnct21.log D.npnct22.log D.npnct23.log
## 2657 2657 2657
## D.npnct24.log D.npnct25.log D.npnct26.log
## 1520 2657 2657
## D.npnct27.log D.npnct28.log D.npnct29.log
## 2657 2649 2657
## D.npnct30.log D.nstopwrds.log D.P.http
## 2657 1664 2657
## D.P.mini D.P.air D.P.black
## 2623 2636 2640
## D.P.white D.P.gold D.P.spacegray
## 2647 2655 2650
## [1] "numeric data w/ Infs in : "
## named integer(0)
## [1] "numeric data w/ NaNs in : "
## named integer(0)
## [1] "string data missing in : "
## description condition cellular carrier color storage
## 1520 0 0 0 0 0
## productline .grpid prdline.my descr.my
## 0 NA 0 1520
4.1: manage missing dataif (glb_cluster) {
require(proxy)
#require(hash)
require(dynamicTreeCut)
require(entropy)
require(tidyr)
# glb_hash <- hash(key=unique(glb_allobs_df$myCategory),
# values=1:length(unique(glb_allobs_df$myCategory)))
# glb_hash_lst <- hash(key=unique(glb_allobs_df$myCategory),
# values=1:length(unique(glb_allobs_df$myCategory)))
#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
print("Clustering features: ")
print(cluster_vars <- grep(paste0("[",
toupper(paste0(substr(glb_txt_vars, 1, 1), collapse="")),
"]\\.[PT]\\."),
names(glb_allobs_df), value=TRUE))
print(sprintf("glb_allobs_df Entropy: %0.4f",
allobs_ent <- entropy(table(glb_allobs_df[, glb_cluster_entropy_var]),
method="ML")))
category_df <- as.data.frame(table(glb_allobs_df[, glb_category_var],
glb_allobs_df[, glb_cluster_entropy_var]))
names(category_df)[c(1, 2)] <- c(glb_category_var, glb_cluster_entropy_var)
category_df <- do.call(tidyr::spread,
list(category_df, glb_cluster_entropy_var, "Freq"))
tmp.entropy <- sapply(1:nrow(category_df),
function(row) entropy(as.numeric(category_df[row, -1]), method="ML"))
tmp.knt <- sapply(1:nrow(category_df),
function(row) sum(as.numeric(category_df[row, -1])))
category_df$.entropy <- tmp.entropy; category_df$.knt <- tmp.knt
print(sprintf("glb_allobs_df$%s Entropy: %0.4f (%0.4f pct)", glb_category_var,
category_ent <- weighted.mean(category_df$.entropy, category_df$.knt),
100 * category_ent / allobs_ent))
print(category_df)
glb_allobs_df$.clusterid <- 1
#print(max(table(glb_allobs_df$myCategory.fctr) / 20))
for (grp in sort(unique(glb_allobs_df[, glb_category_var]))) {
print(sprintf("Category: %s", grp))
ctgry_allobs_df <- glb_allobs_df[glb_allobs_df[, glb_category_var] == grp, ]
if (!inherits(ctgry_allobs_df[, glb_cluster_entropy_var], "factor"))
ctgry_allobs_df[, glb_cluster_entropy_var] <-
as.factor(ctgry_allobs_df[, glb_cluster_entropy_var])
dstns_dist <- dist(ctgry_allobs_df[, cluster_vars], method = "cosine")
dstns_mtrx <- as.matrix(dstns_dist)
print(sprintf("max distance(%0.4f) pair:", max(dstns_mtrx)))
row_ix <- ceiling(which.max(dstns_mtrx) / ncol(dstns_mtrx))
col_ix <- which.max(dstns_mtrx[row_ix, ])
print(ctgry_allobs_df[c(row_ix, col_ix),
c(glb_id_var, glb_cluster_entropy_var, glb_category_var, glb_txt_vars, cluster_vars)])
min_dstns_mtrx <- dstns_mtrx
diag(min_dstns_mtrx) <- 1
# Float representations issue -2.22e-16 vs. 0.0000
print(sprintf("min distance(%0.4f) pair:", min(min_dstns_mtrx)))
row_ix <- ceiling(which.min(min_dstns_mtrx) / ncol(min_dstns_mtrx))
col_ix <- which.min(min_dstns_mtrx[row_ix, ])
print(ctgry_allobs_df[c(row_ix, col_ix),
c(glb_id_var, glb_cluster_entropy_var, glb_category_var, glb_txt_vars,
cluster_vars)])
set.seed(glb_cluster.seed)
clusters <- hclust(dstns_dist, method = "ward.D2")
#plot(clusters, labels=NULL, hang=-1)
myplclust(clusters, lab.col=unclass(ctgry_allobs_df[, glb_cluster_entropy_var]))
#clusterGroups = cutree(clusters, k=7)
clusterGroups <- cutreeDynamic(clusters, minClusterSize=10, method="tree",
deepSplit=0)
# Unassigned groups are labeled 0; the largest group has label 1
table(clusterGroups, ctgry_allobs_df[, glb_cluster_entropy_var], useNA="ifany")
#print(ctgry_allobs_df[which(clusterGroups == 1), c("UniqueID", "Popular", "Headline")])
#print(ctgry_allobs_df[(clusterGroups == 1) & !is.na(ctgry_allobs_df$Popular) & (ctgry_allobs_df$Popular==1), c("UniqueID", "Popular", "Headline")])
clusterGroups[clusterGroups == 0] <- 1
table(clusterGroups, ctgry_allobs_df[, glb_cluster_entropy_var], useNA="ifany")
#summary(factor(clusterGroups))
# clusterGroups <- clusterGroups +
# 100 * # has to be > max(table(glb_allobs_df[, glb_category_var].fctr) / minClusterSize=20)
# which(levels(glb_allobs_df[, glb_category_var].fctr) == grp)
# table(clusterGroups, ctgry_allobs_df[, glb_cluster_entropy_var], useNA="ifany")
# add to glb_allobs_df - then split the data again
glb_allobs_df[glb_allobs_df[, glb_category_var]==grp,]$.clusterid <- clusterGroups
#print(unique(glb_allobs_df$.clusterid))
#print(glb_feats_df[glb_feats_df$id == ".clusterid.fctr", ])
}
cluster_df <- as.data.frame(table(glb_allobs_df[, glb_category_var],
glb_allobs_df[, ".clusterid"],
glb_allobs_df[, glb_cluster_entropy_var]))
cluster_df <- subset(cluster_df, Freq > 0)
names(cluster_df)[c(1, 2, 3)] <- c(glb_category_var, ".clusterid",
glb_cluster_entropy_var)
# spread(unite(cluster_df, prdline.my.clusterid, prdline.my, .clusterid),
# sold.fctr, Freq)
cluster_df <- do.call(tidyr::unite,
list(cluster_df, paste0(glb_category_var, ".clusterid"),
grep(glb_category_var, names(cluster_df)),
grep(".clusterid", names(cluster_df))))
cluster_df <- do.call(tidyr::spread,
list(cluster_df, glb_cluster_entropy_var, "Freq"))
cluster_df[is.na(cluster_df)] <- 0
tmp.entropy <- sapply(1:nrow(cluster_df),
function(row) entropy(as.numeric(cluster_df[row, -1]), method="ML"))
tmp.knt <- sapply(1:nrow(cluster_df),
function(row) sum(as.numeric(cluster_df[row, -1])))
cluster_df$.entropy <- tmp.entropy; cluster_df$.knt <- tmp.knt
print(sprintf("glb_allobs_df$%s$.clusterid Entropy: %0.4f (%0.4f pct)",
glb_category_var,
cluster_ent <- weighted.mean(cluster_df$.entropy, cluster_df$.knt),
100 * cluster_ent / category_ent))
print(cluster_df)
glb_allobs_df$.clusterid.fctr <- as.factor(glb_allobs_df$.clusterid)
glb_exclude_vars_as_features <- c(glb_exclude_vars_as_features,
".clusterid")
glb_interaction_only_features[ifelse(grepl("\\.fctr", glb_category_var),
glb_category_var,
paste0(glb_category_var, ".fctr"))] <-
c(".clusterid.fctr")
glb_exclude_vars_as_features <- c(glb_exclude_vars_as_features,
cluster_vars)
}
## Loading required package: proxy
##
## Attaching package: 'proxy'
##
## The following objects are masked from 'package:stats':
##
## as.dist, dist
##
## The following object is masked from 'package:base':
##
## as.matrix
##
## Loading required package: dynamicTreeCut
## Loading required package: entropy
## [1] "Clustering features: "
## [1] "D.T.condit" "D.T.use" "D.T.scratch" "D.T.new"
## [5] "D.T.good" "D.T.screen" "D.T.great" "D.T.ipad"
## [9] "D.T.work" "D.T.excel" "D.T.like" "D.T.box"
## [13] "D.T.function." "D.T.item" "D.T.fulli" "D.T.cosmet"
## [17] "D.T.minor" "D.T.mint" "D.T.crack" "D.T.wear"
## [21] "D.T.perfect" "D.T.includ" "D.T.lock" "D.T.case"
## [25] "D.T.icloud" "D.T.see" "D.T.light" "D.T.devic"
## [29] "D.T.pleas" "D.T.back" "D.T.origin" "D.T.dent"
## [33] "D.T.hous" "D.T.sign" "D.T.open" "D.T.clean"
## [37] "D.T.will" "D.T.appl" "D.T.charger" "D.T.damag"
## [41] "D.T.X100" "D.T.come" "D.T.scuff" "D.T.corner"
## [45] "D.T.small" "D.T.broken" "D.T.descript" "D.T.unit"
## [49] "D.T.refurbish" "D.T.show" "D.T.shape" "D.T.read"
## [53] "D.T.test" "D.T.pictur" "D.T.bare" "D.T.brand"
## [57] "D.T.list" "D.T.may" "D.T.mark" "D.T.blemish"
## [61] "D.T.packag" "D.T.mini" "D.T.affect" "D.T.normal"
## [65] "D.T.tab" "D.T.top" "D.T.accessori" "D.T.ding"
## [69] "D.T.near" "D.T.digit" "D.T.photo" "D.T.tear"
## [73] "D.T.display" "D.T.minim" "D.T.wifi" "D.T.order"
## [77] "D.T.protector" "D.T.kept" "D.T.right" "D.T.previous"
## [81] "D.T.button" "D.T.alway" "D.T.contact" "D.T.fair"
## [85] "D.T.air" "D.T.esn" "D.T.full" "D.T.averag"
## [89] "D.T.free" "D.T.sinc" "D.T.imei" "D.T.cabl"
## [93] "D.T.seal" "D.T.profession" "D.T.overal" "D.T.retail"
## [97] "D.T.refer" "D.T.left" "D.T.stock" "D.T.two"
## [101] "D.T.detail" "D.T.bodi" "D.T.seller" "D.T.activ"
## [105] "D.T.phone" "D.T.problem" "D.T.manufactur" "D.T.side"
## [109] "D.T.certifi" "D.T.ship" "D.T.chip" "D.T.edg"
## [113] "D.T.inspect" "D.T.heavili" "D.T.keyboard" "D.T.non"
## [117] "D.T.geek" "D.T.squad" "D.T.handset" "D.T.upper"
## [121] "D.T.sticker" "D.T.scroll" "D.T.must" "D.T.contain"
## [125] "D.T.imag" "D.T.qualiti" "D.T.anoth" "D.T.pic"
## [129] "D.T.least" "D.T.correct" "D.T.featur" "D.T.technician"
## [133] "D.T.super" "D.T.expect" "D.T.sync" "D.T.speaker"
## [137] "D.T.name" "D.T.lightn" "D.T.X2016" "D.T.passcod"
## [141] "D.T.money" "D.T.els" "D.T.stylus" "D.T.corpor"
## [145] "D.T.intro" "D.T.higher" "D.T.beetl" "D.T.defens"
## [149] "D.T.disclaim" "D.T.essenti" "D.T.final" "D.T.repeat."
## [153] "D.P.http" "D.P.mini" "D.P.air" "D.P.black"
## [157] "D.P.white" "D.P.gold" "D.P.spacegray"
## [1] "glb_allobs_df Entropy: 0.6903"
## [1] "glb_allobs_df$prdl.my.descr.fctr Entropy: 0.6779 (98.1925 pct)"
## prdl.my.descr.fctr N Y .entropy .knt
## 1 Unknown#0 72 47 0.6709143 119
## 2 Unknown#1 46 33 0.6795459 79
## 3 iPad 1#0 53 69 0.6845225 122
## 4 iPad 1#1 47 56 0.6893248 103
## 5 iPad 2#0 57 80 0.6789878 137
## 6 iPad 2#1 84 67 0.6867963 151
## 7 iPad 3+#0 58 87 0.6730117 145
## 8 iPad 3+#1 108 58 0.6470729 166
## 9 iPadAir#0 125 95 0.6838206 220
## 10 iPadAir#1 78 55 0.6781190 133
## 11 iPadmini 2+#0 95 59 0.6655694 154
## 12 iPadmini 2+#1 30 21 0.6774944 51
## 13 iPadmini#0 94 79 0.6893836 173
## 14 iPadmini#1 52 54 0.6929692 106
## [1] "Category: Unknown#0"
## [1] "max distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 24 10024 N Unknown#0 0 0
## 24.1 10024 N Unknown#0 0 0
## D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 24 0 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0 0
## D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli
## 24 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0
## D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 24 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0
## D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 24 0 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0 0
## D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 24 0 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0 0
## D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 24 0 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0 0
## D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 24 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0
## D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur
## 24 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0
## D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag
## 24 0 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0 0
## D.T.mini D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding
## 24 0 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0 0
## D.T.near D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi
## 24 0 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0 0
## D.T.order D.T.protector D.T.kept D.T.right D.T.previous D.T.button
## 24 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0
## D.T.alway D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag
## 24 0 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0 0
## D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession
## 24 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0
## D.T.overal D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail
## 24 0 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0 0
## D.T.bodi D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur
## 24 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0
## D.T.side D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect
## 24 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0
## D.T.heavili D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset
## 24 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0
## D.T.upper D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag
## 24 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0
## D.T.qualiti D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur
## 24 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0
## D.T.technician D.T.super D.T.expect D.T.sync D.T.speaker D.T.name
## 24 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0
## D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus
## 24 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0
## D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim
## 24 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0
## D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black
## 24 0 0 0 0 0 0 0
## 24.1 0 0 0 0 0 0 0
## D.P.white D.P.gold D.P.spacegray
## 24 0 0 0
## 24.1 0 0 0
## [1] "min distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 24 10024 N Unknown#0 0 0
## 66 10066 N Unknown#0 0 0
## D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 24 0 0 0 0 0 0 0
## 66 0 0 0 0 0 0 0
## D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 24 0 0 0 0 0 0 0
## 66 0 0 0 0 0 0 0
## D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 24 0 0 0 0 0 0 0
## 66 0 0 0 0 0 0 0
## D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 24 0 0 0 0 0 0 0
## 66 0 0 0 0 0 0 0
## D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 24 0 0 0 0 0 0 0
## 66 0 0 0 0 0 0 0
## D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 24 0 0 0 0 0 0 0
## 66 0 0 0 0 0 0 0
## D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 24 0 0 0 0 0 0
## 66 0 0 0 0 0 0
## D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 24 0 0 0 0 0 0 0
## 66 0 0 0 0 0 0 0
## D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 24 0 0 0 0 0 0 0
## 66 0 0 0 0 0 0 0
## D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 24 0 0 0 0 0 0 0
## 66 0 0 0 0 0 0 0
## D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 24 0 0 0 0 0 0
## 66 0 0 0 0 0 0
## D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 24 0 0 0 0 0 0
## 66 0 0 0 0 0 0
## D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 24 0 0 0 0 0 0 0 0
## 66 0 0 0 0 0 0 0 0
## D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 24 0 0 0 0 0 0
## 66 0 0 0 0 0 0
## D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 24 0 0 0 0 0 0 0
## 66 0 0 0 0 0 0 0
## D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 24 0 0 0 0 0 0
## 66 0 0 0 0 0 0
## D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non D.T.geek
## 24 0 0 0 0 0 0 0
## 66 0 0 0 0 0 0 0
## D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must
## 24 0 0 0 0 0 0
## 66 0 0 0 0 0 0
## D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 24 0 0 0 0 0 0
## 66 0 0 0 0 0 0
## D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 24 0 0 0 0 0 0
## 66 0 0 0 0 0 0
## D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els
## 24 0 0 0 0 0 0 0
## 66 0 0 0 0 0 0 0
## D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens
## 24 0 0 0 0 0 0
## 66 0 0 0 0 0 0
## D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini
## 24 0 0 0 0 0 0
## 66 0 0 0 0 0 0
## D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 24 0 0 0 0 0
## 66 0 0 0 0 0
## [1] "Category: Unknown#1"
## [1] "max distance(1.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr
## 5 10005 N Unknown#1
## 130 10130 Y Unknown#1
## descr.my
## 5 Please feel free to buy. All product have been thoroughly inspected, cleaned and tested to be 100%
## 130 New - Open Box. Charger included.
## D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 5 0 0 0 0.0000000 0 0 0
## 130 0 0 0 0.8180361 0 0 0
## D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 5 0 0 0 0 0.0000000 0 0
## 130 0 0 0 0 0.9188446 0 0
## D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 5 0 0 0 0 0 0 0
## 130 0 0 0 0 0 0 0
## D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 5 0.000000 0 0 0 0 0 0
## 130 1.000109 0 0 0 0 0 0
## D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 5 0.5309493 0 0 0 0 0 0.00000
## 130 0.0000000 0 0 0 0 0 1.18323
## D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 5 0.5983265 0 0 0.000000 0 0.5375583 0
## 130 0.0000000 0 0 1.225531 0 0.0000000 0
## D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 5 0 0 0 0 0 0
## 130 0 0 0 0 0 0
## D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur
## 5 0 0 0 0 0.6166129 0
## 130 0 0 0 0 0.0000000 0
## D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag
## 5 0 0 0 0 0 0 0
## 130 0 0 0 0 0 0 0
## D.T.mini D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding
## 5 0 0 0 0 0 0 0
## 130 0 0 0 0 0 0 0
## D.T.near D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi
## 5 0 0 0 0 0 0 0
## 130 0 0 0 0 0 0 0
## D.T.order D.T.protector D.T.kept D.T.right D.T.previous D.T.button
## 5 0 0 0 0 0 0
## 130 0 0 0 0 0 0
## D.T.alway D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag
## 5 0 0 0 0 0 0 0
## 130 0 0 0 0 0 0 0
## D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession
## 5 0.7127655 0 0 0 0 0
## 130 0.0000000 0 0 0 0 0
## D.T.overal D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail
## 5 0 0 0 0 0 0 0
## 130 0 0 0 0 0 0 0
## D.T.bodi D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur
## 5 0 0 0 0 0 0
## 130 0 0 0 0 0 0
## D.T.side D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 5 0 0 0 0 0 0.8205658 0
## 130 0 0 0 0 0 0.0000000 0
## D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 5 0 0 0 0 0 0
## 130 0 0 0 0 0 0
## D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 5 0 0 0 0 0 0
## 130 0 0 0 0 0 0
## D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 5 0 0 0 0 0 0
## 130 0 0 0 0 0 0
## D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn
## 5 0 0 0 0 0 0
## 130 0 0 0 0 0 0
## D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor
## 5 0 0 0 0 0 0
## 130 0 0 0 0 0 0
## D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti
## 5 0 0 0 0 0 0
## 130 0 0 0 0 0 0
## D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white
## 5 0 0 0 0 0 0 0
## 130 0 0 0 0 0 0 0
## D.P.gold D.P.spacegray
## 5 0 0
## 130 0 0
## [1] "min distance(-0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr
## 244 10244 N Unknown#1
## 1293 11294 N Unknown#1
## descr.my
## 244 Sync/ Charge cable included. Unit is in perfect working order with only minimal scuffs. No earbuds
## 1293 Sync/ Charge cable included. Unit is in perfect working order with only minimal scuffs. No earbuds
## D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 244 0 0 0 0 0 0 0
## 1293 0 0 0 0 0 0 0
## D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 244 0 0.340566 0 0 0 0 0
## 1293 0 0.340566 0 0 0 0 0
## D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear
## 244 0 0 0 0 0 0
## 1293 0 0 0 0 0 0
## D.T.perfect D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light
## 244 0.5085657 0.4545948 0 0 0 0 0
## 1293 0.5085657 0.4545948 0 0 0 0 0
## D.T.devic D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign
## 244 0 0 0 0 0 0 0
## 1293 0 0 0 0 0 0 0
## D.T.open D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100
## 244 0 0 0 0 0 0 0
## 1293 0 0 0 0 0 0 0
## D.T.come D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript
## 244 0 0.5439332 0 0 0 0
## 1293 0 0.5439332 0 0 0 0
## D.T.unit D.T.refurbish D.T.show D.T.shape D.T.read D.T.test
## 244 0.5503322 0 0 0 0 0
## 1293 0.5503322 0 0 0 0 0
## D.T.pictur D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish
## 244 0 0 0 0 0 0 0
## 1293 0 0 0 0 0 0 0
## D.T.packag D.T.mini D.T.affect D.T.normal D.T.tab D.T.top
## 244 0 0 0 0 0 0
## 1293 0 0 0 0 0 0
## D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo D.T.tear
## 244 0 0 0 0 0 0
## 1293 0 0 0 0 0 0
## D.T.display D.T.minim D.T.wifi D.T.order D.T.protector D.T.kept
## 244 0 0.5971116 0 0.6348423 0 0
## 1293 0 0.5971116 0 0.6348423 0 0
## D.T.right D.T.previous D.T.button D.T.alway D.T.contact D.T.fair
## 244 0 0 0 0 0 0
## 1293 0 0 0 0 0 0
## D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 244 0 0 0 0 0 0 0
## 1293 0 0 0 0 0 0 0
## D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 244 0.6550598 0 0 0 0 0
## 1293 0.6550598 0 0 0 0 0
## D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 244 0 0 0 0 0 0 0
## 1293 0 0 0 0 0 0 0
## D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 244 0 0 0 0 0 0
## 1293 0 0 0 0 0 0
## D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 244 0 0 0 0 0 0
## 1293 0 0 0 0 0 0
## D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll
## 244 0 0 0 0 0 0
## 1293 0 0 0 0 0 0
## D.T.must D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 244 0 0 0 0 0 0 0
## 1293 0 0 0 0 0 0 0
## D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 244 0 0 0 0 0 0.8230595
## 1293 0 0 0 0 0 0.8230595
## D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money
## 244 0 0 0 0 0 0
## 1293 0 0 0 0 0 0
## D.T.els D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl
## 244 0 0 0 0 0 0
## 1293 0 0 0 0 0 0
## D.T.defens D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http
## 244 0 0 0 0 0 0
## 1293 0 0 0 0 0 0
## D.P.mini D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 244 0 0 0 0 0 0
## 1293 0 0 0 0 0 0
## [1] "Category: iPad 1#0"
## [1] "max distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 9 10009 Y iPad 1#0 0 0
## 9.1 10009 Y iPad 1#0 0 0
## D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 9 0 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0 0
## D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 9 0 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0 0
## D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 9 0 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0 0
## D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 9 0 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0 0
## D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 9 0 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0 0
## D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 9 0 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0 0
## D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 9 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0
## D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 9 0 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0 0
## D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 9 0 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0 0
## D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 9 0 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0 0
## D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 9 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0
## D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 9 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0
## D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc
## 9 0 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0 0
## D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail
## 9 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0
## D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller
## 9 0 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0 0
## D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi
## 9 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0
## D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 9 0 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0 0
## D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll
## 9 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0
## D.T.must D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 9 0 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0 0
## D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 9 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0
## D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money
## 9 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0
## D.T.els D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl
## 9 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0
## D.T.defens D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http
## 9 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0
## D.P.mini D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 9 0 0 0 0 0 0
## 9.1 0 0 0 0 0 0
## [1] "min distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 9 10009 Y iPad 1#0 0 0
## 12 10012 N iPad 1#0 0 0
## D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 9 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0
## D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 9 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0
## D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 9 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0
## D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 9 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0
## D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 9 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0
## D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 9 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0
## D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 9 0 0 0 0 0 0
## 12 0 0 0 0 0 0
## D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 9 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0
## D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 9 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0
## D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 9 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0
## D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 9 0 0 0 0 0 0
## 12 0 0 0 0 0 0
## D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 9 0 0 0 0 0 0
## 12 0 0 0 0 0 0
## D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 9 0 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0 0
## D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 9 0 0 0 0 0 0
## 12 0 0 0 0 0 0
## D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 9 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0
## D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 9 0 0 0 0 0 0
## 12 0 0 0 0 0 0
## D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non D.T.geek
## 9 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0
## D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must
## 9 0 0 0 0 0 0
## 12 0 0 0 0 0 0
## D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 9 0 0 0 0 0 0
## 12 0 0 0 0 0 0
## D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 9 0 0 0 0 0 0
## 12 0 0 0 0 0 0
## D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els
## 9 0 0 0 0 0 0 0
## 12 0 0 0 0 0 0 0
## D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens
## 9 0 0 0 0 0 0
## 12 0 0 0 0 0 0
## D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini
## 9 0 0 0 0 0 0
## 12 0 0 0 0 0 0
## D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 9 0 0 0 0 0
## 12 0 0 0 0 0
## [1] "Category: iPad 1#1"
## [1] "max distance(1.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr
## 13 10013 Y iPad 1#1
## 68 10068 Y iPad 1#1
## descr.my
## 13 GOOD CONDITION. CLEAN ICLOUD. NO LOCKS. CLEAN IMEI. This tablet has been fully tested and works
## 68 14 Days Warranty. Product has some sign of wear and scratches from previous use. (Please see
## D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 13 0.2193351 0.0000000 0.0000000 0 0.3412301 0 0
## 68 0.0000000 0.3190707 0.3215711 0 0.0000000 0 0
## D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 13 0 0.340566 0 0 0 0 0
## 68 0 0.000000 0 0 0 0 0
## D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 13 0.4469228 0 0 0 0 0.0000000 0
## 68 0.0000000 0 0 0 0 0.4717371 0
## D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 13 0 0.5408471 0 0.5062025 0.0000000 0 0
## 68 0 0.0000000 0 0.0000000 0.5647662 0 0
## D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 13 0.0000000 0 0 0 0 0.0000000 0
## 68 0.5309493 0 0 0 0 0.5108796 0
## D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 13 1.087866 0 0 0 0 0 0
## 68 0.000000 0 0 0 0 0 0
## D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 13 0 0 0 0 0 0
## 68 0 0 0 0 0 0
## D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur D.T.bare
## 13 0 0 0 0 0.5605572 0 0
## 68 0 0 0 0 0.0000000 0 0
## D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini
## 13 0 0 0 0 0 0 0
## 68 0 0 0 0 0 0 0
## D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding D.T.near
## 13 0 0 0 0 0 0 0
## 68 0 0 0 0 0 0 0
## D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi D.T.order
## 13 0 0 0 0 0 0 0
## 68 0 0 0 0 0 0 0
## D.T.protector D.T.kept D.T.right D.T.previous D.T.button D.T.alway
## 13 0 0 0 0.0000000 0 0
## 68 0 0 0 0.6852021 0 0
## D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free
## 13 0 0 0 0 0 0 0
## 68 0 0 0 0 0 0 0
## D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal
## 13 0 0.6625563 0 0 0 0
## 68 0 0.0000000 0 0 0 0
## D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi
## 13 0 0 0 0 0 0 0
## 68 0 0 0 0 0 0 0
## D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side
## 13 0 0 0 0 0 0
## 68 0 0 0 0 0 0
## D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 13 0 0 0 0 0 0
## 68 0 0 0 0 0 0
## D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 13 0 0 0 0 0 0
## 68 0 0 0 0 0 0
## D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 13 0 0 0 0 0 0
## 68 0 0 0 0 0 0
## D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 13 0 0 0 0 0 0
## 68 0 0 0 0 0 0
## D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn D.T.X2016
## 13 0 0 0 0 0 0 0
## 68 0 0 0 0 0 0 0
## D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor D.T.intro
## 13 0 0 0 0 0 0
## 68 0 0 0 0 0 0
## D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti D.T.final
## 13 0 0 0 0 0 0
## 68 0 0 0 0 0 0
## D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white D.P.gold
## 13 0 0 0 0 0 0 0
## 68 0 0 0 0 0 0 0
## D.P.spacegray
## 13 0
## 68 0
## [1] "min distance(-0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr
## 1046 11046 N iPad 1#1
## 2371 12373 <NA> iPad 1#1
## descr.my
## 1046 This TAB is in average condition with some scratches on the housing or screen (does not affect
## 2371 This TAB is in average condition with some scratches on the housing or screen (does not affect
## D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 1046 0.3446695 0 0.4593873 0 0 0.5201247 0
## 2371 0.3446695 0 0.4593873 0 0 0.5201247 0
## D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 1046 0 0 0 0 0 0 0
## 2371 0 0 0 0 0 0 0
## D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## D.T.perfect D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light
## 1046 0 0 0 0 0 0 0
## 2371 0 0 0 0 0 0 0
## D.T.devic D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign
## 1046 0 0 0 0 0 0.7436654 0
## 2371 0 0 0 0 0 0.7436654 0
## D.T.open D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100
## 1046 0 0 0 0 0 0 0
## 2371 0 0 0 0 0 0 0
## D.T.come D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## D.T.unit D.T.refurbish D.T.show D.T.shape D.T.read D.T.test
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## D.T.pictur D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish
## 1046 0 0 0 0 0 0 0
## 2371 0 0 0 0 0 0 0
## D.T.packag D.T.mini D.T.affect D.T.normal D.T.tab D.T.top
## 1046 0 0 0.9383182 0 0.9458136 0
## 2371 0 0 0.9383182 0 0.9458136 0
## D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo D.T.tear
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## D.T.display D.T.minim D.T.wifi D.T.order D.T.protector D.T.kept
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## D.T.right D.T.previous D.T.button D.T.alway D.T.contact D.T.fair
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 1046 0 0 0 1.066956 0 0 0
## 2371 0 0 0 1.066956 0 0 0
## D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 1046 0 0 0 0 0 0 0
## 2371 0 0 0 0 0 0 0
## D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## D.T.must D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 1046 0 0 0 0 0 0 0
## 2371 0 0 0 0 0 0 0
## D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## D.T.els D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## D.T.defens D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## D.P.mini D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 1046 0 0 0 0 0 0
## 2371 0 0 0 0 0 0
## [1] "Category: iPad 2#0"
## [1] "max distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 39 10039 N iPad 2#0 0 0
## 39.1 10039 N iPad 2#0 0 0
## D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 39 0 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0 0
## D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli
## 39 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0
## D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 39 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0
## D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 39 0 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0 0
## D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 39 0 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0 0
## D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 39 0 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0 0
## D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 39 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0
## D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur
## 39 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0
## D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag
## 39 0 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0 0
## D.T.mini D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding
## 39 0 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0 0
## D.T.near D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi
## 39 0 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0 0
## D.T.order D.T.protector D.T.kept D.T.right D.T.previous D.T.button
## 39 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0
## D.T.alway D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag
## 39 0 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0 0
## D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession
## 39 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0
## D.T.overal D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail
## 39 0 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0 0
## D.T.bodi D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur
## 39 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0
## D.T.side D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect
## 39 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0
## D.T.heavili D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset
## 39 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0
## D.T.upper D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag
## 39 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0
## D.T.qualiti D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur
## 39 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0
## D.T.technician D.T.super D.T.expect D.T.sync D.T.speaker D.T.name
## 39 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0
## D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus
## 39 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0
## D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim
## 39 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0
## D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black
## 39 0 0 0 0 0 0 0
## 39.1 0 0 0 0 0 0 0
## D.P.white D.P.gold D.P.spacegray
## 39 0 0 0
## 39.1 0 0 0
## [1] "min distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 39 10039 N iPad 2#0 0 0
## 73 10073 Y iPad 2#0 0 0
## D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 39 0 0 0 0 0 0 0
## 73 0 0 0 0 0 0 0
## D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 39 0 0 0 0 0 0 0
## 73 0 0 0 0 0 0 0
## D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 39 0 0 0 0 0 0 0
## 73 0 0 0 0 0 0 0
## D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 39 0 0 0 0 0 0 0
## 73 0 0 0 0 0 0 0
## D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 39 0 0 0 0 0 0 0
## 73 0 0 0 0 0 0 0
## D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 39 0 0 0 0 0 0 0
## 73 0 0 0 0 0 0 0
## D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 39 0 0 0 0 0 0
## 73 0 0 0 0 0 0
## D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 39 0 0 0 0 0 0 0
## 73 0 0 0 0 0 0 0
## D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 39 0 0 0 0 0 0 0
## 73 0 0 0 0 0 0 0
## D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 39 0 0 0 0 0 0 0
## 73 0 0 0 0 0 0 0
## D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 39 0 0 0 0 0 0
## 73 0 0 0 0 0 0
## D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 39 0 0 0 0 0 0
## 73 0 0 0 0 0 0
## D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 39 0 0 0 0 0 0 0 0
## 73 0 0 0 0 0 0 0 0
## D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 39 0 0 0 0 0 0
## 73 0 0 0 0 0 0
## D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 39 0 0 0 0 0 0 0
## 73 0 0 0 0 0 0 0
## D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 39 0 0 0 0 0 0
## 73 0 0 0 0 0 0
## D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non D.T.geek
## 39 0 0 0 0 0 0 0
## 73 0 0 0 0 0 0 0
## D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must
## 39 0 0 0 0 0 0
## 73 0 0 0 0 0 0
## D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 39 0 0 0 0 0 0
## 73 0 0 0 0 0 0
## D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 39 0 0 0 0 0 0
## 73 0 0 0 0 0 0
## D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els
## 39 0 0 0 0 0 0 0
## 73 0 0 0 0 0 0 0
## D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens
## 39 0 0 0 0 0 0
## 73 0 0 0 0 0 0
## D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini
## 39 0 0 0 0 0 0
## 73 0 0 0 0 0 0
## D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 39 0 0 0 0 0
## 73 0 0 0 0 0
## [1] "Category: iPad 2#1"
## [1] "max distance(1.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr
## 1 10001 N iPad 2#1
## 2 10002 Y iPad 2#1
## descr.my
## 1 iPad is in 8.5+ out of 10 cosmetic condition!
## 2 Previously used, please read description. May show signs of use such as scratches to the screen and
## D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 1 0.8042288 0.0000000 0.0000000 0 0 0.0000000 0
## 2 0.0000000 0.5801286 0.2923374 0 0 0.3309884 0
## D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 1 1.172534 0 0 0 0 0 0
## 2 0.000000 0 0 0 0 0 0
## D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 1 0 1.501739 0 0 0 0 0
## 2 0 0.000000 0 0 0 0 0
## D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 1 0.0000000 0 0 0 0 0.000000 0
## 2 0.4826812 0 0 0 0 0.464436 0
## D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 1 0 0 0 0 0.0000000 0
## 2 0 0 0 0 0.5755626 0
## D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur D.T.bare
## 1 0 0.0000000 0 0.0000000 0 0 0
## 2 0 0.5184688 0 0.5837624 0 0 0
## D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini
## 1 0 0 0.0000000 0 0 0 0
## 2 0 0 0.5570595 0 0 0 0
## D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding D.T.near
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi D.T.order
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## D.T.protector D.T.kept D.T.right D.T.previous D.T.button D.T.alway
## 1 0 0 0 0.000000 0 0
## 2 0 0 0 0.622911 0 0
## D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least D.T.correct
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## D.T.featur D.T.technician D.T.super D.T.expect D.T.sync D.T.speaker
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim
## 1 0 0 0 0 0 0
## 2 0 0 0 0 0 0
## D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black
## 1 0 0 0 0 0 0 0
## 2 0 0 0 0 0 0 0
## D.P.white D.P.gold D.P.spacegray
## 1 0 0 0
## 2 0 0 0
## [1] "min distance(-0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr
## 158 10158 N iPad 2#1
## 1196 11196 Y iPad 2#1
## descr.my
## 158 This iPad 2 is used and is in good working order. It has scuffs/ scratches from general use. Note
## 1196 This iPad 2 is used and is in good working order. It has scuffs/ scratches from general use and there
## D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen
## 158 0 0.6381414 0.3215711 0 0.3753531 0
## 1196 0 0.7090460 0.3573012 0 0.4170590 0
## D.T.great D.T.ipad D.T.work D.T.excel D.T.like D.T.box
## 158 0 0.3517602 0.3746226 0 0 0
## 1196 0 0.3908446 0.4162473 0 0 0
## D.T.function. D.T.item D.T.fulli D.T.cosmet D.T.minor D.T.mint
## 158 0 0 0 0 0 0
## 1196 0 0 0 0 0 0
## D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock D.T.case
## 158 0 0 0 0 0 0
## 1196 0 0 0 0 0 0
## D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back D.T.origin
## 158 0 0 0 0 0 0 0
## 1196 0 0 0 0 0 0 0
## D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will D.T.appl
## 158 0 0 0 0 0 0 0
## 1196 0 0 0 0 0 0 0
## D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 158 0 0 0 0 0.5983265 0
## 1196 0 0 0 0 0.6648072 0
## D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 158 0 0 0 0 0 0
## 1196 0 0 0 0 0 0
## D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 158 0 0 0 0 0 0 0
## 1196 0 0 0 0 0 0 0
## D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect
## 158 0 0 0 0 0 0
## 1196 0 0 0 0 0 0
## D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit
## 158 0 0 0 0 0 0 0
## 1196 0 0 0 0 0 0 0
## D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi D.T.order
## 158 0 0 0 0 0 0.6983265
## 1196 0 0 0 0 0 0.7759183
## D.T.protector D.T.kept D.T.right D.T.previous D.T.button D.T.alway
## 158 0 0 0 0 0 0
## 1196 0 0 0 0 0 0
## D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free
## 158 0 0 0 0 0 0 0
## 1196 0 0 0 0 0 0 0
## D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal
## 158 0 0 0 0 0 0
## 1196 0 0 0 0 0 0
## D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi
## 158 0 0 0 0 0 0 0
## 1196 0 0 0 0 0 0 0
## D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side
## 158 0 0 0 0 0 0
## 1196 0 0 0 0 0 0
## D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 158 0 0 0 0 0 0
## 1196 0 0 0 0 0 0
## D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 158 0 0 0 0 0 0
## 1196 0 0 0 0 0 0
## D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 158 0 0 0 0 0 0
## 1196 0 0 0 0 0 0
## D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 158 0 0 0 0 0 0
## 1196 0 0 0 0 0 0
## D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn
## 158 0 0 0 0 0 0
## 1196 0 0 0 0 0 0
## D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor
## 158 0 0 0 0 0 0
## 1196 0 0 0 0 0 0
## D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti
## 158 0 0 0 0 0 0
## 1196 0 0 0 0 0 0
## D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white
## 158 0 0 0 0 0 0 0
## 1196 0 0 0 0 0 0 0
## D.P.gold D.P.spacegray
## 158 0 0
## 1196 0 0
## [1] "Category: iPad 3+#0"
## [1] "max distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 3 10003 Y iPad 3+#0 0 0
## 3.1 10003 Y iPad 3+#0 0 0
## D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 3 0 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0 0
## D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 3 0 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0 0
## D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 3 0 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0 0
## D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 3 0 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0 0
## D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 3 0 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0 0
## D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 3 0 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0 0
## D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 3 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0
## D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 3 0 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0 0
## D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 3 0 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0 0
## D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 3 0 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0 0
## D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 3 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0
## D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 3 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0
## D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc
## 3 0 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0 0
## D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail
## 3 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0
## D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller
## 3 0 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0 0
## D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi
## 3 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0
## D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 3 0 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0 0
## D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll
## 3 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0
## D.T.must D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 3 0 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0 0
## D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 3 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0
## D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money
## 3 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0
## D.T.els D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl
## 3 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0
## D.T.defens D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http
## 3 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0
## D.P.mini D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 3 0 0 0 0 0 0
## 3.1 0 0 0 0 0 0
## [1] "min distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 3 10003 Y iPad 3+#0 0 0
## 10 10010 Y iPad 3+#0 0 0
## D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 3 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0
## D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 3 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0
## D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 3 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0
## D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 3 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0
## D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 3 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0
## D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 3 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0
## D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 3 0 0 0 0 0 0
## 10 0 0 0 0 0 0
## D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 3 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0
## D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 3 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0
## D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 3 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0
## D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 3 0 0 0 0 0 0
## 10 0 0 0 0 0 0
## D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 3 0 0 0 0 0 0
## 10 0 0 0 0 0 0
## D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 3 0 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0 0
## D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 3 0 0 0 0 0 0
## 10 0 0 0 0 0 0
## D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 3 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0
## D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 3 0 0 0 0 0 0
## 10 0 0 0 0 0 0
## D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non D.T.geek
## 3 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0
## D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must
## 3 0 0 0 0 0 0
## 10 0 0 0 0 0 0
## D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 3 0 0 0 0 0 0
## 10 0 0 0 0 0 0
## D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 3 0 0 0 0 0 0
## 10 0 0 0 0 0 0
## D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els
## 3 0 0 0 0 0 0 0
## 10 0 0 0 0 0 0 0
## D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens
## 3 0 0 0 0 0 0
## 10 0 0 0 0 0 0
## D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini
## 3 0 0 0 0 0 0
## 10 0 0 0 0 0 0
## D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 3 0 0 0 0 0
## 10 0 0 0 0 0
## [1] "Category: iPad 3+#1"
## [1] "max distance(1.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr
## 11 10011 Y iPad 3+#1
## 37 10037 Y iPad 3+#1
## descr.my
## 11 good condition, minor wear and tear on body some light scratches on screen. functions great.
## 37 Rarely ever used it.
## D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 11 0.2193351 0.000000 0.2923374 0 0.3412301 0.3309884 0.4008907
## 37 0.0000000 1.063569 0.0000000 0 0.0000000 0.0000000 0.0000000
## D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 11 0 0 0 0 0 0.410691 0
## 37 0 0 0 0 0 0.000000 0
## D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 11 0 0 0.4095653 0 0 0.4288519 0
## 37 0 0 0.0000000 0 0 0.0000000 0
## D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 11 0 0 0 0 0 0.4577939 0
## 37 0 0 0 0 0 0.0000000 0
## D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 11 0 0 0 0 0 0 0
## 37 0 0 0 0 0 0 0
## D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 11 0 0 0 0 0 0 0
## 37 0 0 0 0 0 0 0
## D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 11 0 0 0 0 0 0
## 37 0 0 0 0 0 0
## D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur D.T.bare
## 11 0 0 0 0 0 0 0
## 37 0 0 0 0 0 0 0
## D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini
## 11 0 0 0 0 0 0 0
## 37 0 0 0 0 0 0 0
## D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding D.T.near
## 11 0 0 0 0 0 0 0
## 37 0 0 0 0 0 0 0
## D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi D.T.order
## 11 0 0 0.5837624 0 0 0 0
## 37 0 0 0.0000000 0 0 0 0
## D.T.protector D.T.kept D.T.right D.T.previous D.T.button D.T.alway
## 11 0 0 0 0 0 0
## 37 0 0 0 0 0 0
## D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free
## 11 0 0 0 0 0 0 0
## 37 0 0 0 0 0 0 0
## D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal
## 11 0 0 0 0 0 0
## 37 0 0 0 0 0 0
## D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi
## 11 0 0 0 0 0 0 0.7459689
## 37 0 0 0 0 0 0 0.0000000
## D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side
## 11 0 0 0 0 0 0
## 37 0 0 0 0 0 0
## D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 11 0 0 0 0 0 0
## 37 0 0 0 0 0 0
## D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 11 0 0 0 0 0 0
## 37 0 0 0 0 0 0
## D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 11 0 0 0 0 0 0
## 37 0 0 0 0 0 0
## D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 11 0 0 0 0 0 0
## 37 0 0 0 0 0 0
## D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn D.T.X2016
## 11 0 0 0 0 0 0 0
## 37 0 0 0 0 0 0 0
## D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor D.T.intro
## 11 0 0 0 0 0 0
## 37 0 0 0 0 0 0
## D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti D.T.final
## 11 0 0 0 0 0 0
## 37 0 0 0 0 0 0
## D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white D.P.gold
## 11 0 0 0 0 0 0 0
## 37 0 0 0 0 0 0 0
## D.P.spacegray
## 11 0
## 37 0
## [1] "min distance(-0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr
## 17 10017 Y iPad 3+#1
## 146 10146 N iPad 3+#1
## descr.my
## 17 Great working iPad. Very minor surface scratches on back as pictured. Other very light scratching
## 146 Great working iPad. Minor surface scratches on back as pictured. Other very light scratching which
## D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 17 0 0 0.6431422 0 0 0 0.4409798
## 146 0 0 0.6431422 0 0 0 0.4409798
## D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 17 0.3517602 0.3746226 0 0 0 0 0
## 146 0.3517602 0.3746226 0 0 0 0 0
## D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 17 0 0 0.4505218 0 0 0 0
## 146 0 0 0.4505218 0 0 0 0
## D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 17 0 0 0 0 0 0.5035733 0
## 146 0 0 0 0 0 0.5035733 0
## D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 17 0 0.5108796 0 0 0 0 0
## 146 0 0.5108796 0 0 0 0 0
## D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 17 0 0 0 0 0 0 0
## 146 0 0 0 0 0 0 0
## D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 17 0 0 0 0 0 0
## 146 0 0 0 0 0 0
## D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur D.T.bare
## 17 0 0 0 0 0 0.6331188 0
## 146 0 0 0 0 0 0.6331188 0
## D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini
## 17 0 0 0 0 0 0 0
## 146 0 0 0 0 0 0 0
## D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding D.T.near
## 17 0 0 0 0 0 0 0
## 146 0 0 0 0 0 0 0
## D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi D.T.order
## 17 0 0 0 0 0 0 0
## 146 0 0 0 0 0 0 0
## D.T.protector D.T.kept D.T.right D.T.previous D.T.button D.T.alway
## 17 0 0 0 0 0 0
## 146 0 0 0 0 0 0
## D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free
## 17 0 0 0 0 0 0 0
## 146 0 0 0 0 0 0 0
## D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal
## 17 0 0 0 0 0 0
## 146 0 0 0 0 0 0
## D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi
## 17 0 0 0 0 0 0 0
## 146 0 0 0 0 0 0 0
## D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side
## 17 0 0 0 0 0 0
## 146 0 0 0 0 0 0
## D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 17 0 0 0 0 0 0
## 146 0 0 0 0 0 0
## D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 17 0 0 0 0 0 0
## 146 0 0 0 0 0 0
## D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 17 0 0 0 0 0 0
## 146 0 0 0 0 0 0
## D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 17 0 0 0 0 0 0
## 146 0 0 0 0 0 0
## D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn
## 17 0 0 0 0 0 0
## 146 0 0 0 0 0 0
## D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor
## 17 0 0 0 0 0 0
## 146 0 0 0 0 0 0
## D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti
## 17 0 0 0 0 0 0
## 146 0 0 0 0 0 0
## D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white
## 17 0 0 0 0 0 0 0
## 146 0 0 0 0 0 0 0
## D.P.gold D.P.spacegray
## 17 0 0
## 146 0 0
## [1] "Category: iPadAir#0"
## [1] "max distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 16 10016 N iPadAir#0 0 0
## 16.1 10016 N iPadAir#0 0 0
## D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 16 0 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0 0
## D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli
## 16 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0
## D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 16 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0
## D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 16 0 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0 0
## D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 16 0 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0 0
## D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 16 0 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0 0
## D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 16 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0
## D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur
## 16 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0
## D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag
## 16 0 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0 0
## D.T.mini D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding
## 16 0 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0 0
## D.T.near D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi
## 16 0 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0 0
## D.T.order D.T.protector D.T.kept D.T.right D.T.previous D.T.button
## 16 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0
## D.T.alway D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag
## 16 0 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0 0
## D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession
## 16 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0
## D.T.overal D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail
## 16 0 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0 0
## D.T.bodi D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur
## 16 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0
## D.T.side D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect
## 16 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0
## D.T.heavili D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset
## 16 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0
## D.T.upper D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag
## 16 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0
## D.T.qualiti D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur
## 16 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0
## D.T.technician D.T.super D.T.expect D.T.sync D.T.speaker D.T.name
## 16 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0
## D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus
## 16 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0
## D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim
## 16 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0
## D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black
## 16 0 0 0 0 0 0 0
## 16.1 0 0 0 0 0 0 0
## D.P.white D.P.gold D.P.spacegray
## 16 0 0 0
## 16.1 0 0 0
## [1] "min distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 16 10016 N iPadAir#0 0 0
## 19 10019 Y iPadAir#0 0 0
## D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 16 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0
## D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 16 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0
## D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 16 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0
## D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 16 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0
## D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 16 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0
## D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 16 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0
## D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 16 0 0 0 0 0 0
## 19 0 0 0 0 0 0
## D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 16 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0
## D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 16 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0
## D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 16 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0
## D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 16 0 0 0 0 0 0
## 19 0 0 0 0 0 0
## D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 16 0 0 0 0 0 0
## 19 0 0 0 0 0 0
## D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 16 0 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0 0
## D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 16 0 0 0 0 0 0
## 19 0 0 0 0 0 0
## D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 16 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0
## D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 16 0 0 0 0 0 0
## 19 0 0 0 0 0 0
## D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non D.T.geek
## 16 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0
## D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must
## 16 0 0 0 0 0 0
## 19 0 0 0 0 0 0
## D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 16 0 0 0 0 0 0
## 19 0 0 0 0 0 0
## D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 16 0 0 0 0 0 0
## 19 0 0 0 0 0 0
## D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els
## 16 0 0 0 0 0 0 0
## 19 0 0 0 0 0 0 0
## D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens
## 16 0 0 0 0 0 0
## 19 0 0 0 0 0 0
## D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini
## 16 0 0 0 0 0 0
## 19 0 0 0 0 0 0
## D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 16 0 0 0 0 0
## 19 0 0 0 0 0
## [1] "Category: iPadAir#1"
## [1] "max distance(1.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr
## 30 10030 Y iPadAir#1
## 33 10033 N iPadAir#1
## descr.my
## 30 Comes with USB Cable and wall adapter. May have minor dings or scuffs.
## 33 We are selling good quality iPads that have been fully tested by an Apple Certified Technician. The
## D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 30 0 0 0 0 0.000000 0 0
## 33 0 0 0 0 0.417059 0 0
## D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 30 0.0000000 0 0 0 0 0 0
## 33 0.3908446 0 0 0 0 0 0
## D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 30 0.000000 0 0.5005798 0 0 0 0
## 33 0.546239 0 0.0000000 0 0 0 0
## D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 30 0 0 0 0 0 0 0
## 33 0 0 0 0 0 0 0
## D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 30 0 0 0 0 0 0 0
## 33 0 0 0 0 0 0 0
## D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 30 0 0 0.0000000 0 0 0 0.6215803
## 33 0 0 0.6103266 0 0 0 0.0000000
## D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 30 0.6648072 0 0 0 0 0
## 33 0.0000000 0 0 0 0 0
## D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur D.T.bare
## 30 0 0 0 0 0.0000000 0 0
## 33 0 0 0 0 0.6851255 0 0
## D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini
## 30 0 0 0.6808506 0 0 0 0
## 33 0 0 0.0000000 0 0 0 0
## D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding D.T.near
## 30 0 0 0 0 0 0.7479696 0
## 33 0 0 0 0 0 0.0000000 0
## D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi D.T.order
## 30 0 0 0 0 0 0 0
## 33 0 0 0 0 0 0 0
## D.T.protector D.T.kept D.T.right D.T.previous D.T.button D.T.alway
## 30 0 0 0 0 0 0
## 33 0 0 0 0 0 0
## D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free
## 30 0 0 0 0 0 0 0
## 33 0 0 0 0 0 0 0
## D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal
## 30 0 0 0.8006286 0 0 0
## 33 0 0 0.0000000 0 0 0
## D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi
## 30 0 0 0 0 0 0 0
## 33 0 0 0 0 0 0 0
## D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side
## 30 0 0 0 0 0 0
## 33 0 0 0 0 0 0
## D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 30 0.0000000 0 0 0 0 0
## 33 0.8948505 0 0 0 0 0
## D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 30 0 0 0 0 0 0
## 33 0 0 0 0 0 0
## D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 30 0 0 0 0 0 0.0000000
## 33 0 0 0 0 0 0.9767356
## D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 30 0 0 0 0 0 0.000000
## 33 0 0 0 0 0 1.005962
## D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn D.T.X2016
## 30 0 0 0 0 0 0 0
## 33 0 0 0 0 0 0 0
## D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor D.T.intro
## 30 0 0 0 0 0 0
## 33 0 0 0 0 0 0
## D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti D.T.final
## 30 0 0 0 0 0 0
## 33 0 0 0 0 0 0
## D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white D.P.gold
## 30 0 0 0 0 0 0 0
## 33 0 0 0 0 0 0 0
## D.P.spacegray
## 30 0
## 33 0
## [1] "min distance(-0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit
## 1322 11323 Y iPadAir#1 Great Condition 1.206343
## 2337 12339 <NA> iPadAir#1 great condition 1.206343
## D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad
## 1322 0 0 0 0 0 2.204899 0
## 2337 0 0 0 0 0 2.204899 0
## D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli
## 1322 0 0 0 0 0 0 0
## 2337 0 0 0 0 0 0 0
## D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 1322 0 0 0 0 0 0
## 2337 0 0 0 0 0 0
## D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 1322 0 0 0 0 0 0 0
## 2337 0 0 0 0 0 0 0
## D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 1322 0 0 0 0 0 0 0
## 2337 0 0 0 0 0 0 0
## D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 1322 0 0 0 0 0 0 0
## 2337 0 0 0 0 0 0 0
## D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 1322 0 0 0 0 0 0
## 2337 0 0 0 0 0 0
## D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur
## 1322 0 0 0 0 0 0
## 2337 0 0 0 0 0 0
## D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag
## 1322 0 0 0 0 0 0 0
## 2337 0 0 0 0 0 0 0
## D.T.mini D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding
## 1322 0 0 0 0 0 0 0
## 2337 0 0 0 0 0 0 0
## D.T.near D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi
## 1322 0 0 0 0 0 0 0
## 2337 0 0 0 0 0 0 0
## D.T.order D.T.protector D.T.kept D.T.right D.T.previous D.T.button
## 1322 0 0 0 0 0 0
## 2337 0 0 0 0 0 0
## D.T.alway D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag
## 1322 0 0 0 0 0 0 0
## 2337 0 0 0 0 0 0 0
## D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession
## 1322 0 0 0 0 0 0
## 2337 0 0 0 0 0 0
## D.T.overal D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail
## 1322 0 0 0 0 0 0 0
## 2337 0 0 0 0 0 0 0
## D.T.bodi D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur
## 1322 0 0 0 0 0 0
## 2337 0 0 0 0 0 0
## D.T.side D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect
## 1322 0 0 0 0 0 0
## 2337 0 0 0 0 0 0
## D.T.heavili D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset
## 1322 0 0 0 0 0 0
## 2337 0 0 0 0 0 0
## D.T.upper D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag
## 1322 0 0 0 0 0 0
## 2337 0 0 0 0 0 0
## D.T.qualiti D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur
## 1322 0 0 0 0 0 0
## 2337 0 0 0 0 0 0
## D.T.technician D.T.super D.T.expect D.T.sync D.T.speaker D.T.name
## 1322 0 0 0 0 0 0
## 2337 0 0 0 0 0 0
## D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus
## 1322 0 0 0 0 0 0
## 2337 0 0 0 0 0 0
## D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim
## 1322 0 0 0 0 0 0
## 2337 0 0 0 0 0 0
## D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black
## 1322 0 0 0 0 0 0 0
## 2337 0 0 0 0 0 0 0
## D.P.white D.P.gold D.P.spacegray
## 1322 0 0 0
## 2337 0 0 0
## [1] "Category: iPadmini 2+#0"
## [1] "max distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 4 10004 N iPadmini 2+#0 0 0
## 4.1 10004 N iPadmini 2+#0 0 0
## D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 4 0 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0
## D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 4 0 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0
## D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 4 0 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0
## D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 4 0 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0
## D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 4 0 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0
## D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 4 0 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0
## D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 4 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0
## D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 4 0 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0
## D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 4 0 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0
## D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 4 0 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0
## D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 4 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0
## D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 4 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0
## D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc
## 4 0 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0
## D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail
## 4 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0
## D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller
## 4 0 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0
## D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi
## 4 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0
## D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 4 0 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0
## D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll
## 4 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0
## D.T.must D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 4 0 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0 0
## D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 4 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0
## D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money
## 4 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0
## D.T.els D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl
## 4 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0
## D.T.defens D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http
## 4 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0
## D.P.mini D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 4 0 0 0 0 0 0
## 4.1 0 0 0 0 0 0
## [1] "min distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 4 10004 N iPadmini 2+#0 0 0
## 6 10006 Y iPadmini 2+#0 0 0
## D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 4 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector D.T.kept
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.T.right D.T.previous D.T.button D.T.alway D.T.contact D.T.fair D.T.air
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal
## 4 0 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0 0
## D.T.profession D.T.overal D.T.retail D.T.refer D.T.left D.T.stock
## 4 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ D.T.phone D.T.problem
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.T.manufactur D.T.side D.T.certifi D.T.ship D.T.chip D.T.edg
## 4 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## D.T.inspect D.T.heavili D.T.keyboard D.T.non D.T.geek D.T.squad
## 4 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must D.T.contain
## 4 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.T.technician D.T.super D.T.expect D.T.sync D.T.speaker D.T.name
## 4 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti
## 4 0 0 0 0 0 0
## 6 0 0 0 0 0 0
## D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white
## 4 0 0 0 0 0 0 0
## 6 0 0 0 0 0 0 0
## D.P.gold D.P.spacegray
## 4 0 0
## 6 0 0
## [1] "Category: iPadmini 2+#1"
## [1] "max distance(1.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr
## 18 10018 N iPadmini 2+#1
## 101 10101 Y iPadmini 2+#1
## descr.my
## 18 We are selling good quality iPads that have been fully tested by an Apple Certified Technician. The
## 101 This item is in Excellent cosmetic condition. It will not have any scratches on the screen. It may
## D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 18 0.0000000 0 0.0000000 0 0.417059 0.0000000 0
## 101 0.3015858 0 0.4019639 0 0.000000 0.4551091 0
## D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 18 0.3908446 0 0.0000000 0 0 0 0.0000000
## 101 0.0000000 0 0.5914658 0 0 0 0.5455444
## D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 18 0.546239 0.0000000 0 0 0 0 0
## 101 0.000000 0.5631522 0 0 0 0 0
## D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 18 0 0 0 0 0 0 0
## 101 0 0 0 0 0 0 0
## D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 18 0 0 0 0 0 0 0
## 101 0 0 0 0 0 0 0
## D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 18 0 0.0000000 0.6103266 0 0 0 0
## 101 0 0.7612725 0.0000000 0 0 0 0
## D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 18 0 0 0 0 0 0
## 101 0 0 0 0 0 0
## D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur
## 18 0 0 0 0 0.6851255 0
## 101 0 0 0 0 0.0000000 0
## D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag
## 18 0 0 0 0.0000000 0 0 0
## 101 0 0 0 0.7659569 0 0 0
## D.T.mini D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding
## 18 0 0 0 0 0 0 0
## 101 0 0 0 0 0 0 0
## D.T.near D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi
## 18 0 0 0 0 0 0 0
## 101 0 0 0 0 0 0 0
## D.T.order D.T.protector D.T.kept D.T.right D.T.previous D.T.button
## 18 0 0 0 0 0 0
## 101 0 0 0 0 0 0
## D.T.alway D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag
## 18 0 0 0 0 0 0 0
## 101 0 0 0 0 0 0 0
## D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal
## 18 0 0 0 0 0 0 0
## 101 0 0 0 0 0 0 0
## D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi
## 18 0 0 0 0 0 0 0
## 101 0 0 0 0 0 0 0
## D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side
## 18 0 0 0 0 0 0
## 101 0 0 0 0 0 0
## D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 18 0.8948505 0 0 0 0 0
## 101 0.0000000 0 0 0 0 0
## D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 18 0 0 0 0 0 0
## 101 0 0 0 0 0 0
## D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 18 0 0 0 0 0 0.9767356
## 101 0 0 0 0 0 0.0000000
## D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 18 0 0 0 0 0 1.005962
## 101 0 0 0 0 0 0.000000
## D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn
## 18 0 0 0 0 0 0
## 101 0 0 0 0 0 0
## D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor
## 18 0 0 0 0 0 0
## 101 0 0 0 0 0 0
## D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti
## 18 0 0 0 0 0 0
## 101 0 0 0 0 0 0
## D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white
## 18 0 0 0 0 0 0 0
## 101 0 0 0 0 0 0 0
## D.P.gold D.P.spacegray
## 18 0 0
## 101 0 0
## [1] "min distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my
## 2152 12154 <NA> iPadmini 2+#1 Only Opened box but is new
## 2443 12445 <NA> iPadmini 2+#1 New Opened Box powered up only.
## D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 2152 0 0 0 1.363393 0 0 0
## 2443 0 0 0 1.022545 0 0 0
## D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 2152 0 0 0 0 1.531408 0 0
## 2443 0 0 0 0 1.148556 0 0
## D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.T.perfect D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light
## 2152 0 0 0 0 0 0 0
## 2443 0 0 0 0 0 0 0
## D.T.devic D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign
## 2152 0 0 0 0 0 0 0
## 2443 0 0 0 0 0 0 0
## D.T.open D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100
## 2152 1.972050 0 0 0 0 0 0
## 2443 1.479038 0 0 0 0 0 0
## D.T.come D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.T.unit D.T.refurbish D.T.show D.T.shape D.T.read D.T.test
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.T.pictur D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish
## 2152 0 0 0 0 0 0 0
## 2443 0 0 0 0 0 0 0
## D.T.packag D.T.mini D.T.affect D.T.normal D.T.tab D.T.top
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo D.T.tear
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.T.display D.T.minim D.T.wifi D.T.order D.T.protector D.T.kept
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.T.right D.T.previous D.T.button D.T.alway D.T.contact D.T.fair
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 2152 0 0 0 0 0 0 0
## 2443 0 0 0 0 0 0 0
## D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 2152 0 0 0 0 0 0 0
## 2443 0 0 0 0 0 0 0
## D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.T.must D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 2152 0 0 0 0 0 0 0
## 2443 0 0 0 0 0 0 0
## D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.T.els D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.T.defens D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## D.P.mini D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 2152 0 0 0 0 0 0
## 2443 0 0 0 0 0 0
## [1] "Category: iPadmini#0"
## [1] "max distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 7 10007 Y iPadmini#0 0 0
## 7.1 10007 Y iPadmini#0 0 0
## D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 7 0 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0 0
## D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 7 0 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0 0
## D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 7 0 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0 0
## D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 7 0 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0 0
## D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 7 0 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0 0
## D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 7 0 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0 0
## D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 7 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0
## D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 7 0 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0 0
## D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 7 0 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0 0
## D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 7 0 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0 0
## D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 7 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0
## D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 7 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0
## D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc
## 7 0 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0 0
## D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail
## 7 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0
## D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller
## 7 0 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0 0
## D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi
## 7 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0
## D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non
## 7 0 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0 0
## D.T.geek D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll
## 7 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0
## D.T.must D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 7 0 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0 0
## D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 7 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0
## D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money
## 7 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0
## D.T.els D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl
## 7 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0
## D.T.defens D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http
## 7 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0
## D.P.mini D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 7 0 0 0 0 0 0
## 7.1 0 0 0 0 0 0
## [1] "min distance(0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr descr.my D.T.condit D.T.use
## 7 10007 Y iPadmini#0 0 0
## 57 10057 N iPadmini#0 0 0
## D.T.scratch D.T.new D.T.good D.T.screen D.T.great D.T.ipad D.T.work
## 7 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0
## D.T.excel D.T.like D.T.box D.T.function. D.T.item D.T.fulli D.T.cosmet
## 7 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0
## D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect D.T.includ D.T.lock
## 7 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0
## D.T.case D.T.icloud D.T.see D.T.light D.T.devic D.T.pleas D.T.back
## 7 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0
## D.T.origin D.T.dent D.T.hous D.T.sign D.T.open D.T.clean D.T.will
## 7 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0
## D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come D.T.scuff D.T.corner
## 7 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0
## D.T.small D.T.broken D.T.descript D.T.unit D.T.refurbish D.T.show
## 7 0 0 0 0 0 0
## 57 0 0 0 0 0 0
## D.T.shape D.T.read D.T.test D.T.pictur D.T.bare D.T.brand D.T.list
## 7 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0
## D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini D.T.affect D.T.normal
## 7 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0
## D.T.tab D.T.top D.T.accessori D.T.ding D.T.near D.T.digit D.T.photo
## 7 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0
## D.T.tear D.T.display D.T.minim D.T.wifi D.T.order D.T.protector
## 7 0 0 0 0 0 0
## 57 0 0 0 0 0 0
## D.T.kept D.T.right D.T.previous D.T.button D.T.alway D.T.contact
## 7 0 0 0 0 0 0
## 57 0 0 0 0 0 0
## D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free D.T.sinc D.T.imei
## 7 0 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0 0
## D.T.cabl D.T.seal D.T.profession D.T.overal D.T.retail D.T.refer
## 7 0 0 0 0 0 0
## 57 0 0 0 0 0 0
## D.T.left D.T.stock D.T.two D.T.detail D.T.bodi D.T.seller D.T.activ
## 7 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0
## D.T.phone D.T.problem D.T.manufactur D.T.side D.T.certifi D.T.ship
## 7 0 0 0 0 0 0
## 57 0 0 0 0 0 0
## D.T.chip D.T.edg D.T.inspect D.T.heavili D.T.keyboard D.T.non D.T.geek
## 7 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0
## D.T.squad D.T.handset D.T.upper D.T.sticker D.T.scroll D.T.must
## 7 0 0 0 0 0 0
## 57 0 0 0 0 0 0
## D.T.contain D.T.imag D.T.qualiti D.T.anoth D.T.pic D.T.least
## 7 0 0 0 0 0 0
## 57 0 0 0 0 0 0
## D.T.correct D.T.featur D.T.technician D.T.super D.T.expect D.T.sync
## 7 0 0 0 0 0 0
## 57 0 0 0 0 0 0
## D.T.speaker D.T.name D.T.lightn D.T.X2016 D.T.passcod D.T.money D.T.els
## 7 0 0 0 0 0 0 0
## 57 0 0 0 0 0 0 0
## D.T.stylus D.T.corpor D.T.intro D.T.higher D.T.beetl D.T.defens
## 7 0 0 0 0 0 0
## 57 0 0 0 0 0 0
## D.T.disclaim D.T.essenti D.T.final D.T.repeat. D.P.http D.P.mini
## 7 0 0 0 0 0 0
## 57 0 0 0 0 0 0
## D.P.air D.P.black D.P.white D.P.gold D.P.spacegray
## 7 0 0 0 0 0
## 57 0 0 0 0 0
## [1] "Category: iPadmini#1"
## [1] "max distance(1.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr
## 60 10060 N iPadmini#1
## 76 10076 Y iPadmini#1
## descr.my
## 60 Minor scuffs in the back. Otherwise looks flawless. See all pictures.
## 76 Works perfectly, NOT iCloud locked, 1 owner. It is in not in very good condition, but works
## D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 60 0.0000000 0 0 0 0.0000000 0 0
## 76 0.3015858 0 0 0 0.4691913 0 0
## D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 60 0 0.0000000 0 0 0 0 0
## 76 0 0.9365565 0 0 0 0 0
## D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear D.T.perfect
## 60 0 0 0.5631522 0 0 0 0.0000000
## 76 0 0 0.0000000 0 0 0 0.6992778
## D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light D.T.devic
## 60 0 0.0000000 0 0.0000000 0.7059578 0 0
## 76 0 0.7436647 0 0.6960284 0.0000000 0 0
## D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign D.T.open
## 60 0 0.6385995 0 0 0 0 0
## 76 0 0.0000000 0 0 0 0 0
## D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100 D.T.come
## 60 0 0 0 0 0 0 0
## 76 0 0 0 0 0 0 0
## D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript D.T.unit
## 60 0.7479081 0 0 0 0 0
## 76 0.0000000 0 0 0 0 0
## D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur D.T.bare
## 60 0 0 0 0 0 0.7913985 0
## 76 0 0 0 0 0 0.0000000 0
## D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag D.T.mini
## 60 0 0 0 0 0 0 0
## 76 0 0 0 0 0 0 0
## D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding D.T.near
## 60 0 0 0 0 0 0 0
## 76 0 0 0 0 0 0 0
## D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi D.T.order
## 60 0 0 0 0 0 0 0
## 76 0 0 0 0 0 0 0
## D.T.protector D.T.kept D.T.right D.T.previous D.T.button D.T.alway
## 60 0 0 0 0 0 0
## 76 0 0 0 0 0 0
## D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag D.T.free
## 60 0 0 0 0 0 0 0
## 76 0 0 0 0 0 0 0
## D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession D.T.overal
## 60 0 0 0 0 0 0
## 76 0 0 0 0 0 0
## D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail D.T.bodi
## 60 0 0 0 0 0 0 0
## 76 0 0 0 0 0 0 0
## D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur D.T.side
## 60 0 0 0 0 0 0
## 76 0 0 0 0 0 0
## D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 60 0 0 0 0 0 0
## 76 0 0 0 0 0 0
## D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 60 0 0 0 0 0 0
## 76 0 0 0 0 0 0
## D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 60 0 0 0 0 0 0
## 76 0 0 0 0 0 0
## D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 60 0 0 0 0 0 0
## 76 0 0 0 0 0 0
## D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn D.T.X2016
## 60 0 0 0 0 0 0 0
## 76 0 0 0 0 0 0 0
## D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor D.T.intro
## 60 0 0 0 0 0 0
## 76 0 0 0 0 0 0
## D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti D.T.final
## 60 0 0 0 0 0 0
## 76 0 0 0 0 0 0
## D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white D.P.gold
## 60 0 0 0 0 0 0 0
## 76 0 0 0 0 0 0 0
## D.P.spacegray
## 60 0
## 76 0
## [1] "min distance(-0.0000) pair:"
## UniqueID sold.fctr prdl.my.descr.fctr
## 335 10335 N iPadmini#1
## 663 10663 N iPadmini#1
## descr.my
## 335 This item is used but well taken care of no cosmetic wears at all . Any other questions feel free to
## 663 This item is used but well taken care of no cosmetic wears at all . Any other questions feel free to
## D.T.condit D.T.use D.T.scratch D.T.new D.T.good D.T.screen D.T.great
## 335 0 0.3190707 0 0 0 0 0
## 663 0 0.3190707 0 0 0 0 0
## D.T.ipad D.T.work D.T.excel D.T.like D.T.box D.T.function. D.T.item
## 335 0 0 0 0 0 0 0.4364355
## 663 0 0 0 0 0 0 0.4364355
## D.T.fulli D.T.cosmet D.T.minor D.T.mint D.T.crack D.T.wear
## 335 0 0.4505218 0 0 0 0.4717371
## 663 0 0.4505218 0 0 0 0.4717371
## D.T.perfect D.T.includ D.T.lock D.T.case D.T.icloud D.T.see D.T.light
## 335 0 0 0 0 0 0 0
## 663 0 0 0 0 0 0 0
## D.T.devic D.T.pleas D.T.back D.T.origin D.T.dent D.T.hous D.T.sign
## 335 0 0 0 0 0 0 0
## 663 0 0 0 0 0 0 0
## D.T.open D.T.clean D.T.will D.T.appl D.T.charger D.T.damag D.T.X100
## 335 0 0 0 0 0 0 0
## 663 0 0 0 0 0 0 0
## D.T.come D.T.scuff D.T.corner D.T.small D.T.broken D.T.descript
## 335 0 0 0 0 0 0
## 663 0 0 0 0 0 0
## D.T.unit D.T.refurbish D.T.show D.T.shape D.T.read D.T.test D.T.pictur
## 335 0 0 0 0 0 0 0
## 663 0 0 0 0 0 0 0
## D.T.bare D.T.brand D.T.list D.T.may D.T.mark D.T.blemish D.T.packag
## 335 0 0 0 0 0 0 0
## 663 0 0 0 0 0 0 0
## D.T.mini D.T.affect D.T.normal D.T.tab D.T.top D.T.accessori D.T.ding
## 335 0 0 0 0 0 0 0
## 663 0 0 0 0 0 0 0
## D.T.near D.T.digit D.T.photo D.T.tear D.T.display D.T.minim D.T.wifi
## 335 0 0 0 0 0 0 0
## 663 0 0 0 0 0 0 0
## D.T.order D.T.protector D.T.kept D.T.right D.T.previous D.T.button
## 335 0 0 0 0 0 0
## 663 0 0 0 0 0 0
## D.T.alway D.T.contact D.T.fair D.T.air D.T.esn D.T.full D.T.averag
## 335 0 0 0 0 0 0 0
## 663 0 0 0 0 0 0 0
## D.T.free D.T.sinc D.T.imei D.T.cabl D.T.seal D.T.profession
## 335 0.7127655 0 0 0 0 0
## 663 0.7127655 0 0 0 0 0
## D.T.overal D.T.retail D.T.refer D.T.left D.T.stock D.T.two D.T.detail
## 335 0 0 0 0 0 0 0
## 663 0 0 0 0 0 0 0
## D.T.bodi D.T.seller D.T.activ D.T.phone D.T.problem D.T.manufactur
## 335 0 0 0 0 0 0
## 663 0 0 0 0 0 0
## D.T.side D.T.certifi D.T.ship D.T.chip D.T.edg D.T.inspect D.T.heavili
## 335 0 0 0 0 0 0 0
## 663 0 0 0 0 0 0 0
## D.T.keyboard D.T.non D.T.geek D.T.squad D.T.handset D.T.upper
## 335 0 0 0 0 0 0
## 663 0 0 0 0 0 0
## D.T.sticker D.T.scroll D.T.must D.T.contain D.T.imag D.T.qualiti
## 335 0 0 0 0 0 0
## 663 0 0 0 0 0 0
## D.T.anoth D.T.pic D.T.least D.T.correct D.T.featur D.T.technician
## 335 0 0 0 0 0 0
## 663 0 0 0 0 0 0
## D.T.super D.T.expect D.T.sync D.T.speaker D.T.name D.T.lightn
## 335 0 0 0 0 0 0
## 663 0 0 0 0 0 0
## D.T.X2016 D.T.passcod D.T.money D.T.els D.T.stylus D.T.corpor
## 335 0 0 0 0 0 0
## 663 0 0 0 0 0 0
## D.T.intro D.T.higher D.T.beetl D.T.defens D.T.disclaim D.T.essenti
## 335 0 0 0 0 0 0
## 663 0 0 0 0 0 0
## D.T.final D.T.repeat. D.P.http D.P.mini D.P.air D.P.black D.P.white
## 335 0 0 0 0 0 0 0
## 663 0 0 0 0 0 0 0
## D.P.gold D.P.spacegray
## 335 0 0
## 663 0 0
## [1] "glb_allobs_df$prdl.my.descr.fctr$.clusterid Entropy: 0.6570 (96.9282 pct)"
## prdl.my.descr.fctr.clusterid N Y .entropy .knt
## 1 Unknown#0_1 72 47 0.6709143 119
## 2 Unknown#1_1 25 13 0.6424220 38
## 3 Unknown#1_2 17 9 0.6450332 26
## 4 Unknown#1_3 4 5 0.6869616 9
## 5 Unknown#1_4 0 6 0.0000000 6
## 6 iPad 1#0_1 53 69 0.6845225 122
## 7 iPad 1#1_1 12 18 0.6730117 30
## 8 iPad 1#1_2 13 16 0.6877868 29
## 9 iPad 1#1_3 12 7 0.6581100 19
## 10 iPad 1#1_4 8 7 0.6909233 15
## 11 iPad 1#1_5 2 8 0.5004024 10
## 12 iPad 2#0_1 57 80 0.6789878 137
## 13 iPad 2#1_1 31 24 0.6850260 55
## 14 iPad 2#1_2 10 14 0.6791933 24
## 15 iPad 2#1_3 19 5 0.5117401 24
## 16 iPad 2#1_4 9 11 0.6881388 20
## 17 iPad 2#1_5 8 8 0.6931472 16
## 18 iPad 2#1_6 7 5 0.6791933 12
## 19 iPad 3+#0_1 58 87 0.6730117 145
## 20 iPad 3+#1_1 26 20 0.6846163 46
## 21 iPad 3+#1_2 25 14 0.6528258 39
## 22 iPad 3+#1_3 14 6 0.6108643 20
## 23 iPad 3+#1_4 16 2 0.3488321 18
## 24 iPad 3+#1_5 9 11 0.6881388 20
## 25 iPad 3+#1_6 5 5 0.6931472 10
## 26 iPad 3+#1_7 13 0 0.0000000 13
## 27 iPadAir#0_1 125 95 0.6838206 220
## 28 iPadAir#1_1 30 25 0.6890092 55
## 29 iPadAir#1_2 19 18 0.6927819 37
## 30 iPadAir#1_3 13 6 0.6236548 19
## 31 iPadAir#1_4 13 4 0.5455946 17
## 32 iPadAir#1_5 3 2 0.6730117 5
## 33 iPadmini 2+#0_1 95 59 0.6655694 154
## 34 iPadmini 2+#1_1 12 8 0.6730117 20
## 35 iPadmini 2+#1_2 7 12 0.6581100 19
## 36 iPadmini 2+#1_3 5 1 0.4505612 6
## 37 iPadmini 2+#1_4 6 0 0.0000000 6
## 38 iPadmini#0_1 94 79 0.6893836 173
## 39 iPadmini#1_1 13 9 0.6765260 22
## 40 iPadmini#1_2 13 7 0.6474466 20
## 41 iPadmini#1_3 8 7 0.6909233 15
## 42 iPadmini#1_4 2 11 0.4293230 13
## 43 iPadmini#1_5 5 8 0.6662784 13
## 44 iPadmini#1_6 6 9 0.6730117 15
## 45 iPadmini#1_7 5 3 0.6615632 8
# Last call for data modifications
#stop(here") # sav_allobs_df <- glb_allobs_df
# glb_allobs_df[(glb_allobs_df$PropR == 0.75) & (glb_allobs_df$State == "Hawaii"), "PropR.fctr"] <- "N"
# Re-partition
glb_trnobs_df <- subset(glb_allobs_df, .src == "Train")
glb_newobs_df <- subset(glb_allobs_df, .src == "Test")
glb_chunks_df <- myadd_chunk(glb_chunks_df, "select.features", major.inc=TRUE)
## label step_major step_minor bgn end elapsed
## 7 manage.missing.data 4 1 54.646 60.319 5.673
## 8 select.features 5 0 60.320 NA NA
5.0: select features#stop(here"); sav_allobs_df <- glb_allobs_df; glb_allobs_df <- sav_allobs_df
print(glb_feats_df <- myselect_features(entity_df=glb_trnobs_df,
exclude_vars_as_features=glb_exclude_vars_as_features,
rsp_var=glb_rsp_var))
## Warning in cor(data.matrix(entity_df[, sel_feats]), y =
## as.numeric(entity_df[, : the standard deviation is zero
## id cor.y
## sold sold 1.0000000000
## biddable biddable 0.5481788380
## startprice.log startprice.log -0.4674275376
## startprice startprice -0.4569767211
## startprice.predict. startprice.predict. -0.3573141534
## startprice.diff startprice.diff -0.2794223471
## UniqueID UniqueID -0.1895466260
## idseq.my idseq.my -0.1895466260
## condition.fctr condition.fctr -0.1535490071
## D.T.hous D.T.hous -0.1373919817
## D.npnct05.log D.npnct05.log -0.1180558939
## D.T.X100 D.T.X100 -0.1150127028
## D.T.near D.T.near -0.0929819941
## D.T.list D.T.list -0.0870905528
## D.T.fair D.T.fair -0.0802848689
## D.terms.n.post.stop D.terms.n.post.stop -0.0800729927
## D.terms.n.post.stem D.terms.n.post.stem -0.0798677390
## D.npnct14.log D.npnct14.log -0.0786203827
## D.T.cosmet D.T.cosmet -0.0777513602
## cellular.fctr cellular.fctr -0.0743297381
## D.T.profession D.T.profession -0.0712586605
## D.T.tab D.T.tab -0.0707242028
## D.terms.n.post.stop.log D.terms.n.post.stop.log -0.0638651730
## D.terms.n.post.stem.log D.terms.n.post.stem.log -0.0638431167
## D.nwrds.unq.log D.nwrds.unq.log -0.0638431167
## D.ndgts.log D.ndgts.log -0.0628684727
## D.T.overal D.T.overal -0.0621057222
## D.npnct09.log D.npnct09.log -0.0618253281
## D.T.mint D.T.mint -0.0610303678
## D.T.stock D.T.stock -0.0607284075
## carrier.fctr carrier.fctr -0.0599089237
## D.T.alway D.T.alway 0.0593444093
## D.npnct12.log D.npnct12.log -0.0593256462
## D.nwrds.log D.nwrds.log -0.0588147403
## D.T.test D.T.test -0.0586208334
## D.T.seller D.T.seller -0.0584897212
## D.T.inspect D.T.inspect -0.0568460093
## D.T.affect D.T.affect -0.0566799690
## D.nchrs.log D.nchrs.log -0.0565357348
## D.T.box D.T.box -0.0563485768
## D.T.like D.T.like -0.0557801451
## D.T.averag D.T.averag -0.0555976359
## D.T.descript D.T.descript 0.0553987246
## D.nuppr.log D.nuppr.log -0.0553358386
## D.ratio.nstopwrds.nwrds D.ratio.nstopwrds.nwrds 0.0537832223
## D.T.phone D.T.phone -0.0527118662
## D.T.origin D.T.origin -0.0525252573
## D.T.left D.T.left 0.0525031466
## D.npnct28.log D.npnct28.log -0.0524583244
## D.T.esn D.T.esn -0.0517020813
## D.T.bare D.T.bare -0.0509186819
## D.T.perfect D.T.perfect -0.0504871511
## D.T.devic D.T.devic -0.0504727874
## D.T.refer D.T.refer 0.0503000028
## D.T.least D.T.least 0.0500485566
## D.npnct06.log D.npnct06.log -0.0499761958
## D.T.wifi D.T.wifi -0.0499453504
## D.T.handset D.T.handset 0.0486468119
## D.npnct15.log D.npnct15.log 0.0484022793
## D.T.minor D.T.minor -0.0483597041
## D.T.ship D.T.ship -0.0483492299
## D.T.free D.T.free -0.0478266395
## D.nstopwrds.log D.nstopwrds.log -0.0474681704
## D.npnct24.log D.npnct24.log -0.0458449965
## D.T.previous D.T.previous 0.0453194378
## D.npnct16.log D.npnct16.log -0.0449403962
## D.T.refurbish D.T.refurbish -0.0449149382
## D.T.two D.T.two 0.0447161329
## D.T.top D.T.top 0.0433671354
## D.T.technician D.T.technician -0.0430848435
## D.T.sync D.T.sync -0.0430848435
## D.T.condit D.T.condit -0.0418798096
## prdline.my.fctr prdline.my.fctr -0.0415814340
## D.T.sign D.T.sign 0.0412800974
## D.T.function. D.T.function. 0.0397438087
## D.T.non D.T.non 0.0397064496
## D.npnct08.log D.npnct08.log -0.0396513123
## D.T.heavili D.T.heavili -0.0391978700
## color.fctr color.fctr -0.0391372902
## D.T.certifi D.T.certifi -0.0385931627
## D.T.broken D.T.broken 0.0380340254
## D.npnct13.log D.npnct13.log -0.0373463069
## D.T.correct D.T.correct -0.0373025158
## D.T.featur D.T.featur -0.0373025158
## D.T.new D.T.new -0.0372353149
## prdl.my.descr.fctr prdl.my.descr.fctr -0.0358461598
## D.T.contain D.T.contain -0.0355839439
## D.T.corpor D.T.corpor 0.0353706112
## D.T.name D.T.name 0.0352663148
## D.T.will D.T.will -0.0350451461
## D.T.scroll D.T.scroll 0.0339028710
## D.T.button D.T.button -0.0338831937
## D.T.crack D.T.crack 0.0337091183
## D.T.imag D.T.imag 0.0335537013
## D.T.minim D.T.minim -0.0330865270
## D.TfIdf.sum.post.stem D.TfIdf.sum.post.stem -0.0323742743
## D.sum.TfIdf D.sum.TfIdf -0.0323742743
## .clusterid .clusterid -0.0318360231
## .clusterid.fctr .clusterid.fctr -0.0318360231
## D.T.imei D.T.imei -0.0308865534
## D.T.display D.T.display -0.0307725689
## D.P.gold D.P.gold -0.0304491748
## D.TfIdf.sum.post.stop D.TfIdf.sum.post.stop -0.0303366192
## D.T.passcod D.T.passcod -0.0303239925
## D.T.charger D.T.charger 0.0301602937
## D.T.cabl D.T.cabl -0.0296202577
## D.T.pic D.T.pic -0.0292106355
## D.T.blemish D.T.blemish -0.0291642840
## D.T.shape D.T.shape 0.0288373334
## D.T.appl D.T.appl -0.0287074081
## D.T.back D.T.back 0.0272103292
## D.T.pictur D.T.pictur 0.0271676520
## D.T.excel D.T.excel 0.0265819659
## D.T.dent D.T.dent 0.0264976918
## D.T.digit D.T.digit 0.0261375762
## D.npnct03.log D.npnct03.log 0.0257637868
## D.T.bodi D.T.bodi -0.0252978602
## D.T.qualiti D.T.qualiti -0.0252899986
## D.T.super D.T.super 0.0250040676
## D.T.els D.T.els 0.0250040676
## D.T.disclaim D.T.disclaim 0.0250040676
## D.T.essenti D.T.essenti 0.0250040676
## D.T.repeat. D.T.repeat. 0.0250040676
## D.npnct07.log D.npnct07.log 0.0250040676
## D.T.light D.T.light -0.0249083615
## D.T.fulli D.T.fulli 0.0243976808
## D.npnct10.log D.npnct10.log -0.0241015016
## D.T.screen D.T.screen 0.0232373651
## D.T.unit D.T.unit -0.0231226779
## D.T.read D.T.read -0.0226911615
## D.T.may D.T.may 0.0225762388
## D.T.contact D.T.contact 0.0218134520
## D.T.money D.T.money -0.0215250231
## D.T.higher D.T.higher -0.0215250231
## D.T.beetl D.T.beetl -0.0215250231
## D.T.defens D.T.defens -0.0215250231
## D.T.final D.T.final -0.0215250231
## D.npnct18.log D.npnct18.log -0.0215250231
## D.T.kept D.T.kept 0.0207141990
## D.T.tear D.T.tear 0.0205481770
## D.T.open D.T.open -0.0193952625
## D.npnct11.log D.npnct11.log -0.0192035548
## D.T.order D.T.order -0.0188854872
## D.P.white D.P.white 0.0184898845
## D.T.pleas D.T.pleas 0.0178751143
## D.T.ipad D.T.ipad -0.0177348755
## D.terms.n.stem.stop.Ratio D.terms.n.stem.stop.Ratio 0.0175790908
## D.T.lock D.T.lock 0.0173034228
## D.T.activ D.T.activ -0.0166891768
## D.T.anoth D.T.anoth 0.0164535903
## D.T.damag D.T.damag -0.0160662651
## D.T.ding D.T.ding 0.0155885916
## D.T.full D.T.full 0.0149589509
## D.T.work D.T.work -0.0145594907
## D.T.detail D.T.detail -0.0139118798
## D.T.item D.T.item -0.0134922662
## D.T.stylus D.T.stylus -0.0125154705
## D.T.packag D.T.packag 0.0124597147
## storage.fctr storage.fctr -0.0116754969
## D.T.edg D.T.edg 0.0114844118
## D.T.must D.T.must 0.0113915486
## D.P.mini D.P.mini -0.0112418293
## D.T.photo D.T.photo 0.0110333858
## D.T.problem D.T.problem 0.0107375772
## D.T.seal D.T.seal 0.0106898740
## D.T.come D.T.come -0.0104488093
## D.T.corner D.T.corner -0.0104287544
## D.T.brand D.T.brand -0.0103861855
## D.T.use D.T.use 0.0103720246
## D.T.scuff D.T.scuff 0.0101340501
## D.T.lightn D.T.lightn -0.0099034064
## D.T.speaker D.T.speaker 0.0096402551
## D.ratio.sum.TfIdf.nwrds D.ratio.sum.TfIdf.nwrds 0.0096247411
## D.P.air D.P.air -0.0092629952
## D.T.includ D.T.includ -0.0091767476
## D.T.side D.T.side 0.0089049983
## D.T.mark D.T.mark -0.0088438689
## D.T.scratch D.T.scratch -0.0088060862
## D.T.icloud D.T.icloud 0.0086539687
## D.T.keyboard D.T.keyboard 0.0082735718
## D.T.right D.T.right -0.0080547459
## D.T.upper D.T.upper 0.0078374765
## D.T.manufactur D.T.manufactur 0.0077942218
## D.T.mini D.T.mini -0.0075528886
## D.T.sinc D.T.sinc 0.0072330260
## D.T.great D.T.great 0.0070063865
## D.T.chip D.T.chip -0.0067464224
## D.T.geek D.T.geek -0.0064074827
## D.T.squad D.T.squad -0.0064074827
## D.T.protector D.T.protector 0.0057850197
## D.T.case D.T.case 0.0057562564
## D.T.wear D.T.wear -0.0048789708
## D.T.sticker D.T.sticker 0.0042625126
## D.T.retail D.T.retail -0.0042217335
## D.T.see D.T.see 0.0041433566
## D.npnct01.log D.npnct01.log 0.0041255300
## D.P.spacegray D.P.spacegray 0.0034818565
## D.T.air D.T.air -0.0029579942
## D.T.clean D.T.clean 0.0025337892
## D.T.normal D.T.normal 0.0019081337
## D.TfIdf.sum.stem.stop.Ratio D.TfIdf.sum.stem.stop.Ratio -0.0014568383
## .rnorm .rnorm -0.0014350110
## D.P.black D.P.black -0.0012485463
## D.T.small D.T.small -0.0010738542
## D.T.accessori D.T.accessori 0.0007963083
## D.T.X2016 D.T.X2016 -0.0005289068
## D.T.good D.T.good -0.0004368629
## D.T.show D.T.show -0.0003156554
## D.T.expect D.T.expect NA
## D.T.intro D.T.intro NA
## D.npnct02.log D.npnct02.log NA
## D.npnct04.log D.npnct04.log NA
## D.npnct17.log D.npnct17.log NA
## D.npnct19.log D.npnct19.log NA
## D.npnct20.log D.npnct20.log NA
## D.npnct21.log D.npnct21.log NA
## D.npnct22.log D.npnct22.log NA
## D.npnct23.log D.npnct23.log NA
## D.npnct25.log D.npnct25.log NA
## D.npnct26.log D.npnct26.log NA
## D.npnct27.log D.npnct27.log NA
## D.npnct29.log D.npnct29.log NA
## D.npnct30.log D.npnct30.log NA
## D.P.http D.P.http NA
## exclude.as.feat cor.y.abs
## sold 1 1.0000000000
## biddable 0 0.5481788380
## startprice.log 1 0.4674275376
## startprice 1 0.4569767211
## startprice.predict. 1 0.3573141534
## startprice.diff 0 0.2794223471
## UniqueID 1 0.1895466260
## idseq.my 0 0.1895466260
## condition.fctr 0 0.1535490071
## D.T.hous 1 0.1373919817
## D.npnct05.log 0 0.1180558939
## D.T.X100 1 0.1150127028
## D.T.near 1 0.0929819941
## D.T.list 1 0.0870905528
## D.T.fair 1 0.0802848689
## D.terms.n.post.stop 0 0.0800729927
## D.terms.n.post.stem 0 0.0798677390
## D.npnct14.log 0 0.0786203827
## D.T.cosmet 1 0.0777513602
## cellular.fctr 0 0.0743297381
## D.T.profession 1 0.0712586605
## D.T.tab 1 0.0707242028
## D.terms.n.post.stop.log 0 0.0638651730
## D.terms.n.post.stem.log 0 0.0638431167
## D.nwrds.unq.log 0 0.0638431167
## D.ndgts.log 0 0.0628684727
## D.T.overal 1 0.0621057222
## D.npnct09.log 0 0.0618253281
## D.T.mint 1 0.0610303678
## D.T.stock 1 0.0607284075
## carrier.fctr 0 0.0599089237
## D.T.alway 1 0.0593444093
## D.npnct12.log 0 0.0593256462
## D.nwrds.log 0 0.0588147403
## D.T.test 1 0.0586208334
## D.T.seller 1 0.0584897212
## D.T.inspect 1 0.0568460093
## D.T.affect 1 0.0566799690
## D.nchrs.log 0 0.0565357348
## D.T.box 1 0.0563485768
## D.T.like 1 0.0557801451
## D.T.averag 1 0.0555976359
## D.T.descript 1 0.0553987246
## D.nuppr.log 0 0.0553358386
## D.ratio.nstopwrds.nwrds 0 0.0537832223
## D.T.phone 1 0.0527118662
## D.T.origin 1 0.0525252573
## D.T.left 1 0.0525031466
## D.npnct28.log 0 0.0524583244
## D.T.esn 1 0.0517020813
## D.T.bare 1 0.0509186819
## D.T.perfect 1 0.0504871511
## D.T.devic 1 0.0504727874
## D.T.refer 1 0.0503000028
## D.T.least 1 0.0500485566
## D.npnct06.log 0 0.0499761958
## D.T.wifi 1 0.0499453504
## D.T.handset 1 0.0486468119
## D.npnct15.log 0 0.0484022793
## D.T.minor 1 0.0483597041
## D.T.ship 1 0.0483492299
## D.T.free 1 0.0478266395
## D.nstopwrds.log 0 0.0474681704
## D.npnct24.log 0 0.0458449965
## D.T.previous 1 0.0453194378
## D.npnct16.log 0 0.0449403962
## D.T.refurbish 1 0.0449149382
## D.T.two 1 0.0447161329
## D.T.top 1 0.0433671354
## D.T.technician 1 0.0430848435
## D.T.sync 1 0.0430848435
## D.T.condit 1 0.0418798096
## prdline.my.fctr 1 0.0415814340
## D.T.sign 1 0.0412800974
## D.T.function. 1 0.0397438087
## D.T.non 1 0.0397064496
## D.npnct08.log 0 0.0396513123
## D.T.heavili 1 0.0391978700
## color.fctr 0 0.0391372902
## D.T.certifi 1 0.0385931627
## D.T.broken 1 0.0380340254
## D.npnct13.log 0 0.0373463069
## D.T.correct 1 0.0373025158
## D.T.featur 1 0.0373025158
## D.T.new 1 0.0372353149
## prdl.my.descr.fctr 0 0.0358461598
## D.T.contain 1 0.0355839439
## D.T.corpor 1 0.0353706112
## D.T.name 1 0.0352663148
## D.T.will 1 0.0350451461
## D.T.scroll 1 0.0339028710
## D.T.button 1 0.0338831937
## D.T.crack 1 0.0337091183
## D.T.imag 1 0.0335537013
## D.T.minim 1 0.0330865270
## D.TfIdf.sum.post.stem 0 0.0323742743
## D.sum.TfIdf 0 0.0323742743
## .clusterid 1 0.0318360231
## .clusterid.fctr 0 0.0318360231
## D.T.imei 1 0.0308865534
## D.T.display 1 0.0307725689
## D.P.gold 1 0.0304491748
## D.TfIdf.sum.post.stop 0 0.0303366192
## D.T.passcod 1 0.0303239925
## D.T.charger 1 0.0301602937
## D.T.cabl 1 0.0296202577
## D.T.pic 1 0.0292106355
## D.T.blemish 1 0.0291642840
## D.T.shape 1 0.0288373334
## D.T.appl 1 0.0287074081
## D.T.back 1 0.0272103292
## D.T.pictur 1 0.0271676520
## D.T.excel 1 0.0265819659
## D.T.dent 1 0.0264976918
## D.T.digit 1 0.0261375762
## D.npnct03.log 0 0.0257637868
## D.T.bodi 1 0.0252978602
## D.T.qualiti 1 0.0252899986
## D.T.super 1 0.0250040676
## D.T.els 1 0.0250040676
## D.T.disclaim 1 0.0250040676
## D.T.essenti 1 0.0250040676
## D.T.repeat. 1 0.0250040676
## D.npnct07.log 0 0.0250040676
## D.T.light 1 0.0249083615
## D.T.fulli 1 0.0243976808
## D.npnct10.log 0 0.0241015016
## D.T.screen 1 0.0232373651
## D.T.unit 1 0.0231226779
## D.T.read 1 0.0226911615
## D.T.may 1 0.0225762388
## D.T.contact 1 0.0218134520
## D.T.money 1 0.0215250231
## D.T.higher 1 0.0215250231
## D.T.beetl 1 0.0215250231
## D.T.defens 1 0.0215250231
## D.T.final 1 0.0215250231
## D.npnct18.log 0 0.0215250231
## D.T.kept 1 0.0207141990
## D.T.tear 1 0.0205481770
## D.T.open 1 0.0193952625
## D.npnct11.log 0 0.0192035548
## D.T.order 1 0.0188854872
## D.P.white 1 0.0184898845
## D.T.pleas 1 0.0178751143
## D.T.ipad 1 0.0177348755
## D.terms.n.stem.stop.Ratio 0 0.0175790908
## D.T.lock 1 0.0173034228
## D.T.activ 1 0.0166891768
## D.T.anoth 1 0.0164535903
## D.T.damag 1 0.0160662651
## D.T.ding 1 0.0155885916
## D.T.full 1 0.0149589509
## D.T.work 1 0.0145594907
## D.T.detail 1 0.0139118798
## D.T.item 1 0.0134922662
## D.T.stylus 1 0.0125154705
## D.T.packag 1 0.0124597147
## storage.fctr 0 0.0116754969
## D.T.edg 1 0.0114844118
## D.T.must 1 0.0113915486
## D.P.mini 1 0.0112418293
## D.T.photo 1 0.0110333858
## D.T.problem 1 0.0107375772
## D.T.seal 1 0.0106898740
## D.T.come 1 0.0104488093
## D.T.corner 1 0.0104287544
## D.T.brand 1 0.0103861855
## D.T.use 1 0.0103720246
## D.T.scuff 1 0.0101340501
## D.T.lightn 1 0.0099034064
## D.T.speaker 1 0.0096402551
## D.ratio.sum.TfIdf.nwrds 0 0.0096247411
## D.P.air 1 0.0092629952
## D.T.includ 1 0.0091767476
## D.T.side 1 0.0089049983
## D.T.mark 1 0.0088438689
## D.T.scratch 1 0.0088060862
## D.T.icloud 1 0.0086539687
## D.T.keyboard 1 0.0082735718
## D.T.right 1 0.0080547459
## D.T.upper 1 0.0078374765
## D.T.manufactur 1 0.0077942218
## D.T.mini 1 0.0075528886
## D.T.sinc 1 0.0072330260
## D.T.great 1 0.0070063865
## D.T.chip 1 0.0067464224
## D.T.geek 1 0.0064074827
## D.T.squad 1 0.0064074827
## D.T.protector 1 0.0057850197
## D.T.case 1 0.0057562564
## D.T.wear 1 0.0048789708
## D.T.sticker 1 0.0042625126
## D.T.retail 1 0.0042217335
## D.T.see 1 0.0041433566
## D.npnct01.log 0 0.0041255300
## D.P.spacegray 1 0.0034818565
## D.T.air 1 0.0029579942
## D.T.clean 1 0.0025337892
## D.T.normal 1 0.0019081337
## D.TfIdf.sum.stem.stop.Ratio 0 0.0014568383
## .rnorm 0 0.0014350110
## D.P.black 1 0.0012485463
## D.T.small 1 0.0010738542
## D.T.accessori 1 0.0007963083
## D.T.X2016 1 0.0005289068
## D.T.good 1 0.0004368629
## D.T.show 1 0.0003156554
## D.T.expect 1 NA
## D.T.intro 1 NA
## D.npnct02.log 0 NA
## D.npnct04.log 0 NA
## D.npnct17.log 0 NA
## D.npnct19.log 0 NA
## D.npnct20.log 0 NA
## D.npnct21.log 0 NA
## D.npnct22.log 0 NA
## D.npnct23.log 0 NA
## D.npnct25.log 0 NA
## D.npnct26.log 0 NA
## D.npnct27.log 0 NA
## D.npnct29.log 0 NA
## D.npnct30.log 0 NA
## D.P.http 1 NA
# sav_feats_df <- glb_feats_df; glb_feats_df <- sav_feats_df
print(glb_feats_df <- orderBy(~-cor.y,
myfind_cor_features(feats_df=glb_feats_df, obs_df=glb_trnobs_df,
rsp_var=glb_rsp_var)))
## [1] "cor(D.TfIdf.sum.post.stem, D.sum.TfIdf)=1.0000"
## [1] "cor(sold.fctr, D.TfIdf.sum.post.stem)=-0.0324"
## [1] "cor(sold.fctr, D.sum.TfIdf)=-0.0324"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.sum.TfIdf as highly correlated with
## D.TfIdf.sum.post.stem
## [1] "cor(D.nwrds.unq.log, D.terms.n.post.stem.log)=1.0000"
## [1] "cor(sold.fctr, D.nwrds.unq.log)=-0.0638"
## [1] "cor(sold.fctr, D.terms.n.post.stem.log)=-0.0638"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.terms.n.post.stem.log as highly correlated
## with D.nwrds.unq.log
## [1] "cor(D.nwrds.unq.log, D.terms.n.post.stop.log)=0.9999"
## [1] "cor(sold.fctr, D.nwrds.unq.log)=-0.0638"
## [1] "cor(sold.fctr, D.terms.n.post.stop.log)=-0.0639"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.nwrds.unq.log as highly correlated with
## D.terms.n.post.stop.log
## [1] "cor(D.nchrs.log, D.nuppr.log)=0.9995"
## [1] "cor(sold.fctr, D.nchrs.log)=-0.0565"
## [1] "cor(sold.fctr, D.nuppr.log)=-0.0553"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.nuppr.log as highly correlated with
## D.nchrs.log
## [1] "cor(D.terms.n.post.stem, D.terms.n.post.stop)=0.9991"
## [1] "cor(sold.fctr, D.terms.n.post.stem)=-0.0799"
## [1] "cor(sold.fctr, D.terms.n.post.stop)=-0.0801"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.terms.n.post.stem as highly correlated with
## D.terms.n.post.stop
## [1] "cor(D.TfIdf.sum.post.stem, D.TfIdf.sum.post.stop)=0.9981"
## [1] "cor(sold.fctr, D.TfIdf.sum.post.stem)=-0.0324"
## [1] "cor(sold.fctr, D.TfIdf.sum.post.stop)=-0.0303"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.TfIdf.sum.post.stop as highly correlated with
## D.TfIdf.sum.post.stem
## [1] "cor(D.nchrs.log, D.terms.n.post.stop.log)=0.9932"
## [1] "cor(sold.fctr, D.nchrs.log)=-0.0565"
## [1] "cor(sold.fctr, D.terms.n.post.stop.log)=-0.0639"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.nchrs.log as highly correlated with
## D.terms.n.post.stop.log
## [1] "cor(D.nwrds.log, D.terms.n.post.stop.log)=0.9932"
## [1] "cor(sold.fctr, D.nwrds.log)=-0.0588"
## [1] "cor(sold.fctr, D.terms.n.post.stop.log)=-0.0639"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.nwrds.log as highly correlated with
## D.terms.n.post.stop.log
## [1] "cor(D.terms.n.post.stop, D.terms.n.post.stop.log)=0.9755"
## [1] "cor(sold.fctr, D.terms.n.post.stop)=-0.0801"
## [1] "cor(sold.fctr, D.terms.n.post.stop.log)=-0.0639"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.terms.n.post.stop.log as highly correlated
## with D.terms.n.post.stop
## [1] "cor(D.npnct24.log, D.ratio.nstopwrds.nwrds)=-0.9654"
## [1] "cor(sold.fctr, D.npnct24.log)=-0.0458"
## [1] "cor(sold.fctr, D.ratio.nstopwrds.nwrds)=0.0538"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.npnct24.log as highly correlated with
## D.ratio.nstopwrds.nwrds
## [1] "cor(D.npnct06.log, D.npnct16.log)=0.9556"
## [1] "cor(sold.fctr, D.npnct06.log)=-0.0500"
## [1] "cor(sold.fctr, D.npnct16.log)=-0.0449"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.npnct16.log as highly correlated with
## D.npnct06.log
## [1] "cor(D.TfIdf.sum.post.stem, D.ratio.nstopwrds.nwrds)=-0.9291"
## [1] "cor(sold.fctr, D.TfIdf.sum.post.stem)=-0.0324"
## [1] "cor(sold.fctr, D.ratio.nstopwrds.nwrds)=0.0538"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.TfIdf.sum.post.stem as highly correlated with
## D.ratio.nstopwrds.nwrds
## [1] "cor(D.nstopwrds.log, D.terms.n.post.stop)=0.8888"
## [1] "cor(sold.fctr, D.nstopwrds.log)=-0.0475"
## [1] "cor(sold.fctr, D.terms.n.post.stop)=-0.0801"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.nstopwrds.log as highly correlated with
## D.terms.n.post.stop
## [1] "cor(D.ratio.nstopwrds.nwrds, D.terms.n.post.stop)=-0.8707"
## [1] "cor(sold.fctr, D.ratio.nstopwrds.nwrds)=0.0538"
## [1] "cor(sold.fctr, D.terms.n.post.stop)=-0.0801"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.ratio.nstopwrds.nwrds as highly correlated
## with D.terms.n.post.stop
## [1] "cor(D.npnct13.log, D.terms.n.post.stop)=0.7383"
## [1] "cor(sold.fctr, D.npnct13.log)=-0.0373"
## [1] "cor(sold.fctr, D.terms.n.post.stop)=-0.0801"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified D.npnct13.log as highly correlated with
## D.terms.n.post.stop
## [1] "cor(carrier.fctr, cellular.fctr)=0.7131"
## [1] "cor(sold.fctr, carrier.fctr)=-0.0599"
## [1] "cor(sold.fctr, cellular.fctr)=-0.0743"
## Warning in myfind_cor_features(feats_df = glb_feats_df, obs_df =
## glb_trnobs_df, : Identified carrier.fctr as highly correlated with
## cellular.fctr
## id cor.y exclude.as.feat cor.y.abs
## 219 sold 1.0000000000 1 1.0000000000
## 211 biddable 0.5481788380 0 0.5481788380
## 17 D.T.alway 0.0593444093 1 0.0593444093
## 48 D.T.descript 0.0553987246 1 0.0553987246
## 202 D.ratio.nstopwrds.nwrds 0.0537832223 0 0.0537832223
## 86 D.T.left 0.0525031466 1 0.0525031466
## 124 D.T.refer 0.0503000028 1 0.0503000028
## 85 D.T.least 0.0500485566 1 0.0500485566
## 71 D.T.handset 0.0486468119 1 0.0486468119
## 182 D.npnct15.log 0.0484022793 0 0.0484022793
## 118 D.T.previous 0.0453194378 1 0.0453194378
## 155 D.T.two 0.0447161329 1 0.0447161329
## 154 D.T.top 0.0433671354 1 0.0433671354
## 140 D.T.sign 0.0412800974 1 0.0412800974
## 67 D.T.function. 0.0397438087 1 0.0397438087
## 104 D.T.non 0.0397064496 1 0.0397064496
## 28 D.T.broken 0.0380340254 1 0.0380340254
## 41 D.T.corpor 0.0353706112 1 0.0353706112
## 101 D.T.name 0.0352663148 1 0.0352663148
## 131 D.T.scroll 0.0339028710 1 0.0339028710
## 44 D.T.crack 0.0337091183 1 0.0337091183
## 76 D.T.imag 0.0335537013 1 0.0335537013
## 33 D.T.charger 0.0301602937 1 0.0301602937
## 136 D.T.shape 0.0288373334 1 0.0288373334
## 21 D.T.back 0.0272103292 1 0.0272103292
## 116 D.T.pictur 0.0271676520 1 0.0271676520
## 59 D.T.excel 0.0265819659 1 0.0265819659
## 47 D.T.dent 0.0264976918 1 0.0264976918
## 51 D.T.digit 0.0261375762 1 0.0261375762
## 170 D.npnct03.log 0.0257637868 0 0.0257637868
## 53 D.T.disclaim 0.0250040676 1 0.0250040676
## 56 D.T.els 0.0250040676 1 0.0250040676
## 58 D.T.essenti 0.0250040676 1 0.0250040676
## 126 D.T.repeat. 0.0250040676 1 0.0250040676
## 148 D.T.super 0.0250040676 1 0.0250040676
## 174 D.npnct07.log 0.0250040676 0 0.0250040676
## 66 D.T.fulli 0.0243976808 1 0.0243976808
## 130 D.T.screen 0.0232373651 1 0.0232373651
## 94 D.T.may 0.0225762388 1 0.0225762388
## 38 D.T.contact 0.0218134520 1 0.0218134520
## 83 D.T.kept 0.0207141990 1 0.0207141990
## 151 D.T.tear 0.0205481770 1 0.0205481770
## 10 D.P.white 0.0184898845 1 0.0184898845
## 117 D.T.pleas 0.0178751143 1 0.0178751143
## 209 D.terms.n.stem.stop.Ratio 0.0175790908 0 0.0175790908
## 91 D.T.lock 0.0173034228 1 0.0173034228
## 18 D.T.anoth 0.0164535903 1 0.0164535903
## 52 D.T.ding 0.0155885916 1 0.0155885916
## 65 D.T.full 0.0149589509 1 0.0149589509
## 110 D.T.packag 0.0124597147 1 0.0124597147
## 55 D.T.edg 0.0114844118 1 0.0114844118
## 100 D.T.must 0.0113915486 1 0.0113915486
## 114 D.T.photo 0.0110333858 1 0.0110333858
## 119 D.T.problem 0.0107375772 1 0.0107375772
## 133 D.T.seal 0.0106898740 1 0.0106898740
## 158 D.T.use 0.0103720246 1 0.0103720246
## 132 D.T.scuff 0.0101340501 1 0.0101340501
## 143 D.T.speaker 0.0096402551 1 0.0096402551
## 203 D.ratio.sum.TfIdf.nwrds 0.0096247411 0 0.0096247411
## 139 D.T.side 0.0089049983 1 0.0089049983
## 75 D.T.icloud 0.0086539687 1 0.0086539687
## 84 D.T.keyboard 0.0082735718 1 0.0082735718
## 157 D.T.upper 0.0078374765 1 0.0078374765
## 92 D.T.manufactur 0.0077942218 1 0.0077942218
## 141 D.T.sinc 0.0072330260 1 0.0072330260
## 70 D.T.great 0.0070063865 1 0.0070063865
## 121 D.T.protector 0.0057850197 1 0.0057850197
## 31 D.T.case 0.0057562564 1 0.0057562564
## 145 D.T.sticker 0.0042625126 1 0.0042625126
## 134 D.T.see 0.0041433566 1 0.0041433566
## 168 D.npnct01.log 0.0041255300 0 0.0041255300
## 9 D.P.spacegray 0.0034818565 1 0.0034818565
## 35 D.T.clean 0.0025337892 1 0.0025337892
## 105 D.T.normal 0.0019081337 1 0.0019081337
## 13 D.T.accessori 0.0007963083 1 0.0007963083
## 138 D.T.show -0.0003156554 1 0.0003156554
## 69 D.T.good -0.0004368629 1 0.0004368629
## 12 D.T.X2016 -0.0005289068 1 0.0005289068
## 142 D.T.small -0.0010738542 1 0.0010738542
## 5 D.P.black -0.0012485463 1 0.0012485463
## 3 .rnorm -0.0014350110 0 0.0014350110
## 165 D.TfIdf.sum.stem.stop.Ratio -0.0014568383 0 0.0014568383
## 16 D.T.air -0.0029579942 1 0.0029579942
## 127 D.T.retail -0.0042217335 1 0.0042217335
## 159 D.T.wear -0.0048789708 1 0.0048789708
## 68 D.T.geek -0.0064074827 1 0.0064074827
## 144 D.T.squad -0.0064074827 1 0.0064074827
## 34 D.T.chip -0.0067464224 1 0.0067464224
## 95 D.T.mini -0.0075528886 1 0.0075528886
## 128 D.T.right -0.0080547459 1 0.0080547459
## 129 D.T.scratch -0.0088060862 1 0.0088060862
## 93 D.T.mark -0.0088438689 1 0.0088438689
## 78 D.T.includ -0.0091767476 1 0.0091767476
## 4 D.P.air -0.0092629952 1 0.0092629952
## 88 D.T.lightn -0.0099034064 1 0.0099034064
## 27 D.T.brand -0.0103861855 1 0.0103861855
## 40 D.T.corner -0.0104287544 1 0.0104287544
## 36 D.T.come -0.0104488093 1 0.0104488093
## 8 D.P.mini -0.0112418293 1 0.0112418293
## 224 storage.fctr -0.0116754969 0 0.0116754969
## 147 D.T.stylus -0.0125154705 1 0.0125154705
## 82 D.T.item -0.0134922662 1 0.0134922662
## 49 D.T.detail -0.0139118798 1 0.0139118798
## 162 D.T.work -0.0145594907 1 0.0145594907
## 45 D.T.damag -0.0160662651 1 0.0160662651
## 14 D.T.activ -0.0166891768 1 0.0166891768
## 81 D.T.ipad -0.0177348755 1 0.0177348755
## 107 D.T.order -0.0188854872 1 0.0188854872
## 178 D.npnct11.log -0.0192035548 0 0.0192035548
## 106 D.T.open -0.0193952625 1 0.0193952625
## 23 D.T.beetl -0.0215250231 1 0.0215250231
## 46 D.T.defens -0.0215250231 1 0.0215250231
## 63 D.T.final -0.0215250231 1 0.0215250231
## 73 D.T.higher -0.0215250231 1 0.0215250231
## 99 D.T.money -0.0215250231 1 0.0215250231
## 185 D.npnct18.log -0.0215250231 0 0.0215250231
## 123 D.T.read -0.0226911615 1 0.0226911615
## 156 D.T.unit -0.0231226779 1 0.0231226779
## 177 D.npnct10.log -0.0241015016 0 0.0241015016
## 87 D.T.light -0.0249083615 1 0.0249083615
## 122 D.T.qualiti -0.0252899986 1 0.0252899986
## 25 D.T.bodi -0.0252978602 1 0.0252978602
## 19 D.T.appl -0.0287074081 1 0.0287074081
## 24 D.T.blemish -0.0291642840 1 0.0291642840
## 115 D.T.pic -0.0292106355 1 0.0292106355
## 30 D.T.cabl -0.0296202577 1 0.0296202577
## 111 D.T.passcod -0.0303239925 1 0.0303239925
## 164 D.TfIdf.sum.post.stop -0.0303366192 0 0.0303366192
## 6 D.P.gold -0.0304491748 1 0.0304491748
## 54 D.T.display -0.0307725689 1 0.0307725689
## 77 D.T.imei -0.0308865534 1 0.0308865534
## 1 .clusterid -0.0318360231 1 0.0318360231
## 2 .clusterid.fctr -0.0318360231 0 0.0318360231
## 163 D.TfIdf.sum.post.stem -0.0323742743 0 0.0323742743
## 204 D.sum.TfIdf -0.0323742743 0 0.0323742743
## 96 D.T.minim -0.0330865270 1 0.0330865270
## 29 D.T.button -0.0338831937 1 0.0338831937
## 161 D.T.will -0.0350451461 1 0.0350451461
## 39 D.T.contain -0.0355839439 1 0.0355839439
## 217 prdl.my.descr.fctr -0.0358461598 0 0.0358461598
## 103 D.T.new -0.0372353149 1 0.0372353149
## 42 D.T.correct -0.0373025158 1 0.0373025158
## 62 D.T.featur -0.0373025158 1 0.0373025158
## 180 D.npnct13.log -0.0373463069 0 0.0373463069
## 32 D.T.certifi -0.0385931627 1 0.0385931627
## 214 color.fctr -0.0391372902 0 0.0391372902
## 72 D.T.heavili -0.0391978700 1 0.0391978700
## 175 D.npnct08.log -0.0396513123 0 0.0396513123
## 218 prdline.my.fctr -0.0415814340 1 0.0415814340
## 37 D.T.condit -0.0418798096 1 0.0418798096
## 149 D.T.sync -0.0430848435 1 0.0430848435
## 152 D.T.technician -0.0430848435 1 0.0430848435
## 125 D.T.refurbish -0.0449149382 1 0.0449149382
## 183 D.npnct16.log -0.0449403962 0 0.0449403962
## 191 D.npnct24.log -0.0458449965 0 0.0458449965
## 198 D.nstopwrds.log -0.0474681704 0 0.0474681704
## 64 D.T.free -0.0478266395 1 0.0478266395
## 137 D.T.ship -0.0483492299 1 0.0483492299
## 97 D.T.minor -0.0483597041 1 0.0483597041
## 160 D.T.wifi -0.0499453504 1 0.0499453504
## 173 D.npnct06.log -0.0499761958 0 0.0499761958
## 50 D.T.devic -0.0504727874 1 0.0504727874
## 112 D.T.perfect -0.0504871511 1 0.0504871511
## 22 D.T.bare -0.0509186819 1 0.0509186819
## 57 D.T.esn -0.0517020813 1 0.0517020813
## 195 D.npnct28.log -0.0524583244 0 0.0524583244
## 108 D.T.origin -0.0525252573 1 0.0525252573
## 113 D.T.phone -0.0527118662 1 0.0527118662
## 199 D.nuppr.log -0.0553358386 0 0.0553358386
## 20 D.T.averag -0.0555976359 1 0.0555976359
## 89 D.T.like -0.0557801451 1 0.0557801451
## 26 D.T.box -0.0563485768 1 0.0563485768
## 166 D.nchrs.log -0.0565357348 0 0.0565357348
## 15 D.T.affect -0.0566799690 1 0.0566799690
## 79 D.T.inspect -0.0568460093 1 0.0568460093
## 135 D.T.seller -0.0584897212 1 0.0584897212
## 153 D.T.test -0.0586208334 1 0.0586208334
## 200 D.nwrds.log -0.0588147403 0 0.0588147403
## 179 D.npnct12.log -0.0593256462 0 0.0593256462
## 212 carrier.fctr -0.0599089237 0 0.0599089237
## 146 D.T.stock -0.0607284075 1 0.0607284075
## 98 D.T.mint -0.0610303678 1 0.0610303678
## 176 D.npnct09.log -0.0618253281 0 0.0618253281
## 109 D.T.overal -0.0621057222 1 0.0621057222
## 167 D.ndgts.log -0.0628684727 0 0.0628684727
## 201 D.nwrds.unq.log -0.0638431167 0 0.0638431167
## 206 D.terms.n.post.stem.log -0.0638431167 0 0.0638431167
## 208 D.terms.n.post.stop.log -0.0638651730 0 0.0638651730
## 150 D.T.tab -0.0707242028 1 0.0707242028
## 120 D.T.profession -0.0712586605 1 0.0712586605
## 213 cellular.fctr -0.0743297381 0 0.0743297381
## 43 D.T.cosmet -0.0777513602 1 0.0777513602
## 181 D.npnct14.log -0.0786203827 0 0.0786203827
## 205 D.terms.n.post.stem -0.0798677390 0 0.0798677390
## 207 D.terms.n.post.stop -0.0800729927 0 0.0800729927
## 61 D.T.fair -0.0802848689 1 0.0802848689
## 90 D.T.list -0.0870905528 1 0.0870905528
## 102 D.T.near -0.0929819941 1 0.0929819941
## 11 D.T.X100 -0.1150127028 1 0.1150127028
## 172 D.npnct05.log -0.1180558939 0 0.1180558939
## 74 D.T.hous -0.1373919817 1 0.1373919817
## 215 condition.fctr -0.1535490071 0 0.1535490071
## 210 UniqueID -0.1895466260 1 0.1895466260
## 216 idseq.my -0.1895466260 0 0.1895466260
## 221 startprice.diff -0.2794223471 0 0.2794223471
## 223 startprice.predict. -0.3573141534 1 0.3573141534
## 220 startprice -0.4569767211 1 0.4569767211
## 222 startprice.log -0.4674275376 1 0.4674275376
## 7 D.P.http NA 1 NA
## 60 D.T.expect NA 1 NA
## 80 D.T.intro NA 1 NA
## 169 D.npnct02.log NA 0 NA
## 171 D.npnct04.log NA 0 NA
## 184 D.npnct17.log NA 0 NA
## 186 D.npnct19.log NA 0 NA
## 187 D.npnct20.log NA 0 NA
## 188 D.npnct21.log NA 0 NA
## 189 D.npnct22.log NA 0 NA
## 190 D.npnct23.log NA 0 NA
## 192 D.npnct25.log NA 0 NA
## 193 D.npnct26.log NA 0 NA
## 194 D.npnct27.log NA 0 NA
## 196 D.npnct29.log NA 0 NA
## 197 D.npnct30.log NA 0 NA
## cor.high.X freqRatio percentUnique zeroVar nzv
## 219 <NA> 1.161628 0.10758472 FALSE FALSE
## 211 <NA> 1.221027 0.10758472 FALSE FALSE
## 17 <NA> 461.000000 0.43033889 FALSE TRUE
## 48 <NA> 203.444444 0.48413125 FALSE TRUE
## 202 D.terms.n.post.stop 13.544304 4.19580420 FALSE FALSE
## 86 <NA> 925.500000 0.37654653 FALSE TRUE
## 124 <NA> 264.142857 0.21516945 FALSE TRUE
## 85 <NA> 463.750000 0.10758472 FALSE TRUE
## 71 <NA> 264.571429 0.10758472 FALSE TRUE
## 182 <NA> 153.416667 0.16137708 FALSE TRUE
## 118 <NA> 263.857143 0.26896181 FALSE TRUE
## 155 <NA> 617.666667 0.21516945 FALSE TRUE
## 154 <NA> 368.200000 0.53792361 FALSE TRUE
## 140 <NA> 112.500000 0.59171598 FALSE TRUE
## 67 <NA> 71.080000 0.59171598 FALSE TRUE
## 104 <NA> 308.500000 0.21516945 FALSE TRUE
## 28 <NA> 263.285714 0.48413125 FALSE TRUE
## 41 <NA> 928.500000 0.10758472 FALSE TRUE
## 101 <NA> 1857.000000 0.16137708 FALSE TRUE
## 131 <NA> 618.333333 0.16137708 FALSE TRUE
## 44 <NA> 258.285714 0.80688542 FALSE TRUE
## 76 <NA> 1857.000000 0.16137708 FALSE TRUE
## 33 <NA> 305.166667 0.69930070 FALSE TRUE
## 136 <NA> 615.000000 0.48413125 FALSE TRUE
## 21 <NA> 138.846154 0.64550834 FALSE TRUE
## 116 <NA> 367.000000 0.48413125 FALSE TRUE
## 59 <NA> 149.666667 0.75309306 FALSE TRUE
## 47 <NA> 165.272727 0.53792361 FALSE TRUE
## 51 <NA> 368.800000 0.48413125 FALSE TRUE
## 170 <NA> 83.227273 0.16137708 FALSE TRUE
## 53 <NA> 1858.000000 0.10758472 FALSE TRUE
## 56 <NA> 1858.000000 0.10758472 FALSE TRUE
## 58 <NA> 1858.000000 0.10758472 FALSE TRUE
## 126 <NA> 1858.000000 0.10758472 FALSE TRUE
## 148 <NA> 1858.000000 0.10758472 FALSE TRUE
## 174 <NA> 1858.000000 0.10758472 FALSE TRUE
## 66 <NA> 99.777778 0.64550834 FALSE TRUE
## 130 <NA> 58.862069 0.80688542 FALSE TRUE
## 94 <NA> 262.571429 0.26896181 FALSE TRUE
## 38 <NA> 230.875000 0.26896181 FALSE TRUE
## 83 <NA> 307.000000 0.53792361 FALSE TRUE
## 151 <NA> 262.571429 0.43033889 FALSE TRUE
## 10 <NA> 231.250000 0.16137708 FALSE TRUE
## 117 <NA> 181.100000 0.59171598 FALSE TRUE
## 209 <NA> 71.600000 0.43033889 FALSE TRUE
## 91 <NA> 366.200000 0.69930070 FALSE TRUE
## 18 <NA> 618.666667 0.10758472 FALSE TRUE
## 52 <NA> 307.166667 0.43033889 FALSE TRUE
## 65 <NA> 616.000000 0.37654653 FALSE TRUE
## 110 <NA> 306.666667 0.37654653 FALSE TRUE
## 55 <NA> 617.666667 0.21516945 FALSE TRUE
## 100 <NA> 618.000000 0.21516945 FALSE TRUE
## 114 <NA> 614.000000 0.59171598 FALSE TRUE
## 119 <NA> 463.250000 0.21516945 FALSE TRUE
## 133 <NA> 927.000000 0.26896181 FALSE TRUE
## 158 <NA> 51.656250 0.96826251 FALSE TRUE
## 132 <NA> 228.250000 0.53792361 FALSE TRUE
## 143 <NA> 928.000000 0.16137708 FALSE TRUE
## 203 <NA> 63.000000 34.85745024 FALSE FALSE
## 139 <NA> 927.500000 0.21516945 FALSE TRUE
## 75 <NA> 182.400000 0.59171598 FALSE TRUE
## 84 <NA> 1854.000000 0.32275417 FALSE TRUE
## 157 <NA> 1857.000000 0.16137708 FALSE TRUE
## 92 <NA> 463.000000 0.26896181 FALSE TRUE
## 141 <NA> 615.666667 0.37654653 FALSE TRUE
## 70 <NA> 98.666667 0.80688542 FALSE TRUE
## 121 <NA> 461.000000 0.37654653 FALSE TRUE
## 31 <NA> 105.352941 0.69930070 FALSE TRUE
## 145 <NA> 463.250000 0.16137708 FALSE TRUE
## 134 <NA> 260.571429 0.69930070 FALSE TRUE
## 168 <NA> 52.970588 0.32275417 FALSE TRUE
## 9 <NA> 463.750000 0.10758472 FALSE TRUE
## 35 <NA> 203.222222 0.64550834 FALSE TRUE
## 105 <NA> 305.500000 0.48413125 FALSE TRUE
## 13 <NA> 229.875000 0.43033889 FALSE TRUE
## 138 <NA> 113.937500 0.32275417 FALSE TRUE
## 69 <NA> 49.200000 0.86067778 FALSE TRUE
## 12 <NA> 1857.000000 0.16137708 FALSE TRUE
## 142 <NA> 261.428571 0.48413125 FALSE TRUE
## 5 <NA> 168.000000 0.10758472 FALSE TRUE
## 3 <NA> 1.000000 100.00000000 FALSE FALSE
## 165 <NA> 65.176471 32.86713287 FALSE FALSE
## 16 <NA> 460.750000 0.43033889 FALSE TRUE
## 127 <NA> 463.000000 0.26896181 FALSE TRUE
## 159 <NA> 99.500000 0.53792361 FALSE TRUE
## 68 <NA> 1856.000000 0.21516945 FALSE TRUE
## 144 <NA> 1856.000000 0.21516945 FALSE TRUE
## 34 <NA> 1855.000000 0.26896181 FALSE TRUE
## 95 <NA> 459.500000 0.59171598 FALSE TRUE
## 128 <NA> 616.000000 0.48413125 FALSE TRUE
## 129 <NA> 43.578947 0.86067778 FALSE TRUE
## 93 <NA> 167.181818 0.37654653 FALSE TRUE
## 78 <NA> 105.882353 0.69930070 FALSE TRUE
## 4 <NA> 122.866667 0.16137708 FALSE TRUE
## 88 <NA> 1856.000000 0.21516945 FALSE TRUE
## 27 <NA> 459.000000 0.69930070 FALSE TRUE
## 40 <NA> 305.500000 0.43033889 FALSE TRUE
## 36 <NA> 139.846154 0.64550834 FALSE TRUE
## 8 <NA> 91.900000 0.16137708 FALSE TRUE
## 224 <NA> 2.725146 0.26896181 FALSE FALSE
## 147 <NA> 1857.000000 0.16137708 FALSE TRUE
## 82 <NA> 88.850000 0.64550834 FALSE TRUE
## 49 <NA> 1856.000000 0.21516945 FALSE TRUE
## 162 <NA> 71.583333 0.69930070 FALSE TRUE
## 45 <NA> 460.000000 0.48413125 FALSE TRUE
## 14 <NA> 927.500000 0.21516945 FALSE TRUE
## 81 <NA> 49.823529 0.96826251 FALSE TRUE
## 107 <NA> 461.250000 0.37654653 FALSE TRUE
## 178 <NA> 9.374269 0.37654653 FALSE FALSE
## 106 <NA> 261.142857 0.53792361 FALSE TRUE
## 23 <NA> 1858.000000 0.10758472 FALSE TRUE
## 46 <NA> 1858.000000 0.10758472 FALSE TRUE
## 63 <NA> 1858.000000 0.10758472 FALSE TRUE
## 73 <NA> 1858.000000 0.10758472 FALSE TRUE
## 99 <NA> 1858.000000 0.10758472 FALSE TRUE
## 185 <NA> 1858.000000 0.10758472 FALSE TRUE
## 123 <NA> 459.500000 0.53792361 FALSE TRUE
## 156 <NA> 203.555556 0.48413125 FALSE TRUE
## 177 <NA> 308.666667 0.16137708 FALSE TRUE
## 87 <NA> 94.526316 0.64550834 FALSE TRUE
## 122 <NA> 463.500000 0.16137708 FALSE TRUE
## 25 <NA> 925.500000 0.32275417 FALSE TRUE
## 19 <NA> 226.750000 0.64550834 FALSE TRUE
## 24 <NA> 107.941176 0.37654653 FALSE TRUE
## 115 <NA> 1857.000000 0.16137708 FALSE TRUE
## 30 <NA> 230.625000 0.32275417 FALSE TRUE
## 111 <NA> 1857.000000 0.16137708 FALSE TRUE
## 164 D.TfIdf.sum.post.stem 63.000000 34.26573427 FALSE FALSE
## 6 <NA> 928.500000 0.10758472 FALSE TRUE
## 54 <NA> 184.300000 0.43033889 FALSE TRUE
## 77 <NA> 369.400000 0.32275417 FALSE TRUE
## 1 <NA> 6.886598 0.37654653 FALSE FALSE
## 2 <NA> 6.886598 0.37654653 FALSE FALSE
## 163 D.ratio.nstopwrds.nwrds 63.000000 34.26573427 FALSE FALSE
## 204 D.TfIdf.sum.post.stem 63.000000 34.26573427 FALSE FALSE
## 96 <NA> 166.909091 0.43033889 FALSE TRUE
## 29 <NA> 369.400000 0.32275417 FALSE TRUE
## 161 <NA> 367.000000 0.53792361 FALSE TRUE
## 39 <NA> 928.000000 0.16137708 FALSE TRUE
## 217 <NA> 1.271676 0.75309306 FALSE FALSE
## 103 <NA> 109.437500 0.86067778 FALSE TRUE
## 42 <NA> 618.666667 0.10758472 FALSE TRUE
## 62 <NA> 618.666667 0.10758472 FALSE TRUE
## 180 D.terms.n.post.stop 5.203065 0.32275417 FALSE FALSE
## 32 <NA> 370.400000 0.21516945 FALSE TRUE
## 214 <NA> 1.544053 0.26896181 FALSE FALSE
## 72 <NA> 308.666667 0.16137708 FALSE TRUE
## 175 <NA> 69.576923 0.21516945 FALSE TRUE
## 218 <NA> 1.135048 0.37654653 FALSE FALSE
## 37 <NA> 25.233333 0.96826251 FALSE TRUE
## 149 <NA> 463.750000 0.10758472 FALSE TRUE
## 152 <NA> 463.750000 0.10758472 FALSE TRUE
## 125 <NA> 183.400000 0.43033889 FALSE TRUE
## 183 D.npnct06.log 31.245614 0.16137708 FALSE TRUE
## 191 D.ratio.nstopwrds.nwrds 1.356147 0.10758472 FALSE FALSE
## 198 D.terms.n.post.stop 13.000000 0.80688542 FALSE FALSE
## 64 <NA> 263.142857 0.37654653 FALSE TRUE
## 137 <NA> 308.333333 0.21516945 FALSE TRUE
## 97 <NA> 84.142857 0.59171598 FALSE TRUE
## 160 <NA> 368.400000 0.43033889 FALSE TRUE
## 173 <NA> 33.735849 0.16137708 FALSE TRUE
## 50 <NA> 94.789474 0.64550834 FALSE TRUE
## 112 <NA> 226.625000 0.64550834 FALSE TRUE
## 22 <NA> 615.333333 0.43033889 FALSE TRUE
## 57 <NA> 461.500000 0.37654653 FALSE TRUE
## 195 <NA> 463.250000 0.16137708 FALSE TRUE
## 108 <NA> 227.875000 0.69930070 FALSE TRUE
## 113 <NA> 463.250000 0.16137708 FALSE TRUE
## 199 D.nchrs.log 18.807018 4.41097364 FALSE FALSE
## 20 <NA> 617.333333 0.26896181 FALSE TRUE
## 89 <NA> 258.571429 0.80688542 FALSE TRUE
## 26 <NA> 89.300000 0.80688542 FALSE TRUE
## 166 D.terms.n.post.stop.log 15.970149 5.70199032 FALSE FALSE
## 15 <NA> 131.428571 0.21516945 FALSE TRUE
## 79 <NA> 617.333333 0.21516945 FALSE TRUE
## 135 <NA> 369.600000 0.21516945 FALSE TRUE
## 153 <NA> 261.571429 0.43033889 FALSE TRUE
## 200 D.terms.n.post.stop.log 12.738095 1.29101668 FALSE FALSE
## 179 <NA> 27.246154 0.21516945 FALSE TRUE
## 212 cellular.fctr 3.220290 0.37654653 FALSE FALSE
## 146 <NA> 462.750000 0.21516945 FALSE TRUE
## 98 <NA> 78.782609 0.75309306 FALSE TRUE
## 176 <NA> 308.333333 0.21516945 FALSE TRUE
## 109 <NA> 263.857143 0.37654653 FALSE TRUE
## 167 <NA> 27.047619 0.69930070 FALSE TRUE
## 201 D.terms.n.post.stop.log 8.568000 0.80688542 FALSE FALSE
## 206 D.nwrds.unq.log 8.568000 0.80688542 FALSE FALSE
## 208 D.terms.n.post.stop 9.232759 0.80688542 FALSE FALSE
## 150 <NA> 204.666667 0.26896181 FALSE TRUE
## 120 <NA> 308.000000 0.21516945 FALSE TRUE
## 213 <NA> 2.116190 0.16137708 FALSE FALSE
## 43 <NA> 49.361111 0.48413125 FALSE TRUE
## 181 <NA> 35.333333 0.26896181 FALSE TRUE
## 205 D.terms.n.post.stop 8.568000 0.80688542 FALSE FALSE
## 207 <NA> 9.232759 0.80688542 FALSE FALSE
## 61 <NA> 369.000000 0.26896181 FALSE TRUE
## 90 <NA> 107.588235 0.26896181 FALSE TRUE
## 102 <NA> 91.600000 0.37654653 FALSE TRUE
## 11 <NA> 86.380952 0.48413125 FALSE TRUE
## 172 <NA> 40.311111 0.10758472 FALSE TRUE
## 74 <NA> 100.333333 0.43033889 FALSE TRUE
## 215 <NA> 4.003460 0.32275417 FALSE FALSE
## 210 <NA> 1.000000 100.00000000 FALSE FALSE
## 216 <NA> 1.000000 100.00000000 FALSE FALSE
## 221 <NA> 1.000000 100.00000000 FALSE FALSE
## 223 <NA> 1.000000 100.00000000 FALSE FALSE
## 220 <NA> 2.807692 30.17751479 FALSE FALSE
## 222 <NA> 2.807692 30.17751479 FALSE FALSE
## 7 <NA> 0.000000 0.05379236 TRUE TRUE
## 60 <NA> 0.000000 0.05379236 TRUE TRUE
## 80 <NA> 0.000000 0.05379236 TRUE TRUE
## 169 <NA> 0.000000 0.05379236 TRUE TRUE
## 171 <NA> 0.000000 0.05379236 TRUE TRUE
## 184 <NA> 0.000000 0.05379236 TRUE TRUE
## 186 <NA> 0.000000 0.05379236 TRUE TRUE
## 187 <NA> 0.000000 0.05379236 TRUE TRUE
## 188 <NA> 0.000000 0.05379236 TRUE TRUE
## 189 <NA> 0.000000 0.05379236 TRUE TRUE
## 190 <NA> 0.000000 0.05379236 TRUE TRUE
## 192 <NA> 0.000000 0.05379236 TRUE TRUE
## 193 <NA> 0.000000 0.05379236 TRUE TRUE
## 194 <NA> 0.000000 0.05379236 TRUE TRUE
## 196 <NA> 0.000000 0.05379236 TRUE TRUE
## 197 <NA> 0.000000 0.05379236 TRUE TRUE
## myNearZV is.cor.y.abs.low
## 219 FALSE FALSE
## 211 FALSE FALSE
## 17 FALSE FALSE
## 48 FALSE FALSE
## 202 FALSE FALSE
## 86 TRUE FALSE
## 124 FALSE FALSE
## 85 FALSE FALSE
## 71 FALSE FALSE
## 182 FALSE FALSE
## 118 FALSE FALSE
## 155 TRUE FALSE
## 154 FALSE FALSE
## 140 FALSE FALSE
## 67 FALSE FALSE
## 104 FALSE FALSE
## 28 FALSE FALSE
## 41 TRUE FALSE
## 101 TRUE FALSE
## 131 TRUE FALSE
## 44 FALSE FALSE
## 76 TRUE FALSE
## 33 FALSE FALSE
## 136 TRUE FALSE
## 21 FALSE FALSE
## 116 FALSE FALSE
## 59 FALSE FALSE
## 47 FALSE FALSE
## 51 FALSE FALSE
## 170 FALSE FALSE
## 53 TRUE FALSE
## 56 TRUE FALSE
## 58 TRUE FALSE
## 126 TRUE FALSE
## 148 TRUE FALSE
## 174 TRUE FALSE
## 66 FALSE FALSE
## 130 FALSE FALSE
## 94 FALSE FALSE
## 38 FALSE FALSE
## 83 FALSE FALSE
## 151 FALSE FALSE
## 10 FALSE FALSE
## 117 FALSE FALSE
## 209 FALSE FALSE
## 91 FALSE FALSE
## 18 TRUE FALSE
## 52 FALSE FALSE
## 65 TRUE FALSE
## 110 FALSE FALSE
## 55 TRUE FALSE
## 100 TRUE FALSE
## 114 TRUE FALSE
## 119 FALSE FALSE
## 133 TRUE FALSE
## 158 FALSE FALSE
## 132 FALSE FALSE
## 143 TRUE FALSE
## 203 FALSE FALSE
## 139 TRUE FALSE
## 75 FALSE FALSE
## 84 TRUE FALSE
## 157 TRUE FALSE
## 92 FALSE FALSE
## 141 TRUE FALSE
## 70 FALSE FALSE
## 121 FALSE FALSE
## 31 FALSE FALSE
## 145 FALSE FALSE
## 134 FALSE FALSE
## 168 FALSE FALSE
## 9 FALSE FALSE
## 35 FALSE FALSE
## 105 FALSE FALSE
## 13 FALSE TRUE
## 138 FALSE TRUE
## 69 FALSE TRUE
## 12 TRUE TRUE
## 142 FALSE TRUE
## 5 FALSE TRUE
## 3 FALSE FALSE
## 165 FALSE FALSE
## 16 FALSE FALSE
## 127 FALSE FALSE
## 159 FALSE FALSE
## 68 TRUE FALSE
## 144 TRUE FALSE
## 34 TRUE FALSE
## 95 FALSE FALSE
## 128 TRUE FALSE
## 129 FALSE FALSE
## 93 FALSE FALSE
## 78 FALSE FALSE
## 4 FALSE FALSE
## 88 TRUE FALSE
## 27 FALSE FALSE
## 40 FALSE FALSE
## 36 FALSE FALSE
## 8 FALSE FALSE
## 224 FALSE FALSE
## 147 TRUE FALSE
## 82 FALSE FALSE
## 49 TRUE FALSE
## 162 FALSE FALSE
## 45 FALSE FALSE
## 14 TRUE FALSE
## 81 FALSE FALSE
## 107 FALSE FALSE
## 178 FALSE FALSE
## 106 FALSE FALSE
## 23 TRUE FALSE
## 46 TRUE FALSE
## 63 TRUE FALSE
## 73 TRUE FALSE
## 99 TRUE FALSE
## 185 TRUE FALSE
## 123 FALSE FALSE
## 156 FALSE FALSE
## 177 FALSE FALSE
## 87 FALSE FALSE
## 122 FALSE FALSE
## 25 TRUE FALSE
## 19 FALSE FALSE
## 24 FALSE FALSE
## 115 TRUE FALSE
## 30 FALSE FALSE
## 111 TRUE FALSE
## 164 FALSE FALSE
## 6 TRUE FALSE
## 54 FALSE FALSE
## 77 FALSE FALSE
## 1 FALSE FALSE
## 2 FALSE FALSE
## 163 FALSE FALSE
## 204 FALSE FALSE
## 96 FALSE FALSE
## 29 FALSE FALSE
## 161 FALSE FALSE
## 39 TRUE FALSE
## 217 FALSE FALSE
## 103 FALSE FALSE
## 42 TRUE FALSE
## 62 TRUE FALSE
## 180 FALSE FALSE
## 32 FALSE FALSE
## 214 FALSE FALSE
## 72 FALSE FALSE
## 175 FALSE FALSE
## 218 FALSE FALSE
## 37 FALSE FALSE
## 149 FALSE FALSE
## 152 FALSE FALSE
## 125 FALSE FALSE
## 183 FALSE FALSE
## 191 FALSE FALSE
## 198 FALSE FALSE
## 64 FALSE FALSE
## 137 FALSE FALSE
## 97 FALSE FALSE
## 160 FALSE FALSE
## 173 FALSE FALSE
## 50 FALSE FALSE
## 112 FALSE FALSE
## 22 TRUE FALSE
## 57 FALSE FALSE
## 195 FALSE FALSE
## 108 FALSE FALSE
## 113 FALSE FALSE
## 199 FALSE FALSE
## 20 TRUE FALSE
## 89 FALSE FALSE
## 26 FALSE FALSE
## 166 FALSE FALSE
## 15 FALSE FALSE
## 79 TRUE FALSE
## 135 FALSE FALSE
## 153 FALSE FALSE
## 200 FALSE FALSE
## 179 FALSE FALSE
## 212 FALSE FALSE
## 146 FALSE FALSE
## 98 FALSE FALSE
## 176 FALSE FALSE
## 109 FALSE FALSE
## 167 FALSE FALSE
## 201 FALSE FALSE
## 206 FALSE FALSE
## 208 FALSE FALSE
## 150 FALSE FALSE
## 120 FALSE FALSE
## 213 FALSE FALSE
## 43 FALSE FALSE
## 181 FALSE FALSE
## 205 FALSE FALSE
## 207 FALSE FALSE
## 61 FALSE FALSE
## 90 FALSE FALSE
## 102 FALSE FALSE
## 11 FALSE FALSE
## 172 FALSE FALSE
## 74 FALSE FALSE
## 215 FALSE FALSE
## 210 FALSE FALSE
## 216 FALSE FALSE
## 221 FALSE FALSE
## 223 FALSE FALSE
## 220 FALSE FALSE
## 222 FALSE FALSE
## 7 TRUE NA
## 60 TRUE NA
## 80 TRUE NA
## 169 TRUE NA
## 171 TRUE NA
## 184 TRUE NA
## 186 TRUE NA
## 187 TRUE NA
## 188 TRUE NA
## 189 TRUE NA
## 190 TRUE NA
## 192 TRUE NA
## 193 TRUE NA
## 194 TRUE NA
## 196 TRUE NA
## 197 TRUE NA
#subset(glb_feats_df, id %in% c("A.nuppr.log", "S.nuppr.log"))
print(myplot_scatter(glb_feats_df, "percentUnique", "freqRatio",
colorcol_name="myNearZV", jitter=TRUE) +
geom_point(aes(shape=nzv)) + xlim(-5, 25))
## Warning in myplot_scatter(glb_feats_df, "percentUnique", "freqRatio",
## colorcol_name = "myNearZV", : converting myNearZV to class:factor
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 12 rows containing missing values (geom_point).
print(subset(glb_feats_df, myNearZV))
## id cor.y exclude.as.feat cor.y.abs cor.high.X
## 86 D.T.left 0.0525031466 1 0.0525031466 <NA>
## 155 D.T.two 0.0447161329 1 0.0447161329 <NA>
## 41 D.T.corpor 0.0353706112 1 0.0353706112 <NA>
## 101 D.T.name 0.0352663148 1 0.0352663148 <NA>
## 131 D.T.scroll 0.0339028710 1 0.0339028710 <NA>
## 76 D.T.imag 0.0335537013 1 0.0335537013 <NA>
## 136 D.T.shape 0.0288373334 1 0.0288373334 <NA>
## 53 D.T.disclaim 0.0250040676 1 0.0250040676 <NA>
## 56 D.T.els 0.0250040676 1 0.0250040676 <NA>
## 58 D.T.essenti 0.0250040676 1 0.0250040676 <NA>
## 126 D.T.repeat. 0.0250040676 1 0.0250040676 <NA>
## 148 D.T.super 0.0250040676 1 0.0250040676 <NA>
## 174 D.npnct07.log 0.0250040676 0 0.0250040676 <NA>
## 18 D.T.anoth 0.0164535903 1 0.0164535903 <NA>
## 65 D.T.full 0.0149589509 1 0.0149589509 <NA>
## 55 D.T.edg 0.0114844118 1 0.0114844118 <NA>
## 100 D.T.must 0.0113915486 1 0.0113915486 <NA>
## 114 D.T.photo 0.0110333858 1 0.0110333858 <NA>
## 133 D.T.seal 0.0106898740 1 0.0106898740 <NA>
## 143 D.T.speaker 0.0096402551 1 0.0096402551 <NA>
## 139 D.T.side 0.0089049983 1 0.0089049983 <NA>
## 84 D.T.keyboard 0.0082735718 1 0.0082735718 <NA>
## 157 D.T.upper 0.0078374765 1 0.0078374765 <NA>
## 141 D.T.sinc 0.0072330260 1 0.0072330260 <NA>
## 12 D.T.X2016 -0.0005289068 1 0.0005289068 <NA>
## 68 D.T.geek -0.0064074827 1 0.0064074827 <NA>
## 144 D.T.squad -0.0064074827 1 0.0064074827 <NA>
## 34 D.T.chip -0.0067464224 1 0.0067464224 <NA>
## 128 D.T.right -0.0080547459 1 0.0080547459 <NA>
## 88 D.T.lightn -0.0099034064 1 0.0099034064 <NA>
## 147 D.T.stylus -0.0125154705 1 0.0125154705 <NA>
## 49 D.T.detail -0.0139118798 1 0.0139118798 <NA>
## 14 D.T.activ -0.0166891768 1 0.0166891768 <NA>
## 23 D.T.beetl -0.0215250231 1 0.0215250231 <NA>
## 46 D.T.defens -0.0215250231 1 0.0215250231 <NA>
## 63 D.T.final -0.0215250231 1 0.0215250231 <NA>
## 73 D.T.higher -0.0215250231 1 0.0215250231 <NA>
## 99 D.T.money -0.0215250231 1 0.0215250231 <NA>
## 185 D.npnct18.log -0.0215250231 0 0.0215250231 <NA>
## 25 D.T.bodi -0.0252978602 1 0.0252978602 <NA>
## 115 D.T.pic -0.0292106355 1 0.0292106355 <NA>
## 111 D.T.passcod -0.0303239925 1 0.0303239925 <NA>
## 6 D.P.gold -0.0304491748 1 0.0304491748 <NA>
## 39 D.T.contain -0.0355839439 1 0.0355839439 <NA>
## 42 D.T.correct -0.0373025158 1 0.0373025158 <NA>
## 62 D.T.featur -0.0373025158 1 0.0373025158 <NA>
## 22 D.T.bare -0.0509186819 1 0.0509186819 <NA>
## 20 D.T.averag -0.0555976359 1 0.0555976359 <NA>
## 79 D.T.inspect -0.0568460093 1 0.0568460093 <NA>
## 7 D.P.http NA 1 NA <NA>
## 60 D.T.expect NA 1 NA <NA>
## 80 D.T.intro NA 1 NA <NA>
## 169 D.npnct02.log NA 0 NA <NA>
## 171 D.npnct04.log NA 0 NA <NA>
## 184 D.npnct17.log NA 0 NA <NA>
## 186 D.npnct19.log NA 0 NA <NA>
## 187 D.npnct20.log NA 0 NA <NA>
## 188 D.npnct21.log NA 0 NA <NA>
## 189 D.npnct22.log NA 0 NA <NA>
## 190 D.npnct23.log NA 0 NA <NA>
## 192 D.npnct25.log NA 0 NA <NA>
## 193 D.npnct26.log NA 0 NA <NA>
## 194 D.npnct27.log NA 0 NA <NA>
## 196 D.npnct29.log NA 0 NA <NA>
## 197 D.npnct30.log NA 0 NA <NA>
## freqRatio percentUnique zeroVar nzv myNearZV is.cor.y.abs.low
## 86 925.5000 0.37654653 FALSE TRUE TRUE FALSE
## 155 617.6667 0.21516945 FALSE TRUE TRUE FALSE
## 41 928.5000 0.10758472 FALSE TRUE TRUE FALSE
## 101 1857.0000 0.16137708 FALSE TRUE TRUE FALSE
## 131 618.3333 0.16137708 FALSE TRUE TRUE FALSE
## 76 1857.0000 0.16137708 FALSE TRUE TRUE FALSE
## 136 615.0000 0.48413125 FALSE TRUE TRUE FALSE
## 53 1858.0000 0.10758472 FALSE TRUE TRUE FALSE
## 56 1858.0000 0.10758472 FALSE TRUE TRUE FALSE
## 58 1858.0000 0.10758472 FALSE TRUE TRUE FALSE
## 126 1858.0000 0.10758472 FALSE TRUE TRUE FALSE
## 148 1858.0000 0.10758472 FALSE TRUE TRUE FALSE
## 174 1858.0000 0.10758472 FALSE TRUE TRUE FALSE
## 18 618.6667 0.10758472 FALSE TRUE TRUE FALSE
## 65 616.0000 0.37654653 FALSE TRUE TRUE FALSE
## 55 617.6667 0.21516945 FALSE TRUE TRUE FALSE
## 100 618.0000 0.21516945 FALSE TRUE TRUE FALSE
## 114 614.0000 0.59171598 FALSE TRUE TRUE FALSE
## 133 927.0000 0.26896181 FALSE TRUE TRUE FALSE
## 143 928.0000 0.16137708 FALSE TRUE TRUE FALSE
## 139 927.5000 0.21516945 FALSE TRUE TRUE FALSE
## 84 1854.0000 0.32275417 FALSE TRUE TRUE FALSE
## 157 1857.0000 0.16137708 FALSE TRUE TRUE FALSE
## 141 615.6667 0.37654653 FALSE TRUE TRUE FALSE
## 12 1857.0000 0.16137708 FALSE TRUE TRUE TRUE
## 68 1856.0000 0.21516945 FALSE TRUE TRUE FALSE
## 144 1856.0000 0.21516945 FALSE TRUE TRUE FALSE
## 34 1855.0000 0.26896181 FALSE TRUE TRUE FALSE
## 128 616.0000 0.48413125 FALSE TRUE TRUE FALSE
## 88 1856.0000 0.21516945 FALSE TRUE TRUE FALSE
## 147 1857.0000 0.16137708 FALSE TRUE TRUE FALSE
## 49 1856.0000 0.21516945 FALSE TRUE TRUE FALSE
## 14 927.5000 0.21516945 FALSE TRUE TRUE FALSE
## 23 1858.0000 0.10758472 FALSE TRUE TRUE FALSE
## 46 1858.0000 0.10758472 FALSE TRUE TRUE FALSE
## 63 1858.0000 0.10758472 FALSE TRUE TRUE FALSE
## 73 1858.0000 0.10758472 FALSE TRUE TRUE FALSE
## 99 1858.0000 0.10758472 FALSE TRUE TRUE FALSE
## 185 1858.0000 0.10758472 FALSE TRUE TRUE FALSE
## 25 925.5000 0.32275417 FALSE TRUE TRUE FALSE
## 115 1857.0000 0.16137708 FALSE TRUE TRUE FALSE
## 111 1857.0000 0.16137708 FALSE TRUE TRUE FALSE
## 6 928.5000 0.10758472 FALSE TRUE TRUE FALSE
## 39 928.0000 0.16137708 FALSE TRUE TRUE FALSE
## 42 618.6667 0.10758472 FALSE TRUE TRUE FALSE
## 62 618.6667 0.10758472 FALSE TRUE TRUE FALSE
## 22 615.3333 0.43033889 FALSE TRUE TRUE FALSE
## 20 617.3333 0.26896181 FALSE TRUE TRUE FALSE
## 79 617.3333 0.21516945 FALSE TRUE TRUE FALSE
## 7 0.0000 0.05379236 TRUE TRUE TRUE NA
## 60 0.0000 0.05379236 TRUE TRUE TRUE NA
## 80 0.0000 0.05379236 TRUE TRUE TRUE NA
## 169 0.0000 0.05379236 TRUE TRUE TRUE NA
## 171 0.0000 0.05379236 TRUE TRUE TRUE NA
## 184 0.0000 0.05379236 TRUE TRUE TRUE NA
## 186 0.0000 0.05379236 TRUE TRUE TRUE NA
## 187 0.0000 0.05379236 TRUE TRUE TRUE NA
## 188 0.0000 0.05379236 TRUE TRUE TRUE NA
## 189 0.0000 0.05379236 TRUE TRUE TRUE NA
## 190 0.0000 0.05379236 TRUE TRUE TRUE NA
## 192 0.0000 0.05379236 TRUE TRUE TRUE NA
## 193 0.0000 0.05379236 TRUE TRUE TRUE NA
## 194 0.0000 0.05379236 TRUE TRUE TRUE NA
## 196 0.0000 0.05379236 TRUE TRUE TRUE NA
## 197 0.0000 0.05379236 TRUE TRUE TRUE NA
glb_allobs_df <- glb_allobs_df[, setdiff(names(glb_allobs_df),
subset(glb_feats_df, myNearZV)$id)]
glb_trnobs_df <- subset(glb_allobs_df, .src == "Train")
glb_newobs_df <- subset(glb_allobs_df, .src == "Test")
if (!is.null(glb_interaction_only_features))
glb_feats_df[glb_feats_df$id %in% glb_interaction_only_features, "interaction.feat"] <-
names(glb_interaction_only_features) else
glb_feats_df$interaction.feat <- NA
mycheck_problem_data(glb_allobs_df, terminate = TRUE)
## [1] "numeric data missing in : "
## sold sold.fctr
## 798 798
## [1] "numeric data w/ 0s in : "
## biddable sold startprice.log
## 1444 999 31
## cellular.fctr D.terms.n.post.stop D.terms.n.post.stop.log
## 1600 1521 1521
## D.TfIdf.sum.post.stop D.terms.n.post.stem D.terms.n.post.stem.log
## 1521 1521 1521
## D.TfIdf.sum.post.stem D.T.condit D.T.use
## 1521 2158 2366
## D.T.scratch D.T.new D.T.good
## 2371 2501 2460
## D.T.screen D.T.great D.T.ipad
## 2444 2532 2425
## D.T.work D.T.excel D.T.like
## 2459 2557 2584
## D.T.box D.T.function. D.T.item
## 2547 2541 2528
## D.T.fulli D.T.cosmet D.T.minor
## 2569 2540 2540
## D.T.mint D.T.crack D.T.wear
## 2594 2580 2556
## D.T.perfect D.T.includ D.T.lock
## 2602 2574 2614
## D.T.case D.T.icloud D.T.see
## 2575 2601 2604
## D.T.light D.T.devic D.T.pleas
## 2576 2577 2590
## D.T.back D.T.origin D.T.dent
## 2580 2599 2592
## D.T.hous D.T.sign D.T.open
## 2585 2580 2613
## D.T.clean D.T.will D.T.appl
## 2615 2618 2598
## D.T.charger D.T.damag D.T.X100
## 2619 2626 2593
## D.T.come D.T.scuff D.T.corner
## 2602 2615 2612
## D.T.small D.T.broken D.T.descript
## 2611 2637 2624
## D.T.unit D.T.refurbish D.T.show
## 2617 2623 2606
## D.T.read D.T.test D.T.pictur
## 2626 2620 2624
## D.T.brand D.T.list D.T.may
## 2627 2616 2619
## D.T.mark D.T.blemish D.T.packag
## 2629 2625 2631
## D.T.mini D.T.affect D.T.normal
## 2623 2629 2626
## D.T.tab D.T.top D.T.accessori
## 2630 2633 2629
## D.T.ding D.T.near D.T.digit
## 2632 2623 2639
## D.T.tear D.T.display D.T.minim
## 2626 2634 2629
## D.T.wifi D.T.order D.T.protector
## 2632 2636 2639
## D.T.kept D.T.previous D.T.button
## 2637 2634 2638
## D.T.alway D.T.contact D.T.fair
## 2639 2642 2635
## D.T.air D.T.esn D.T.free
## 2636 2641 2638
## D.T.imei D.T.cabl D.T.profession
## 2640 2639 2641
## D.T.overal D.T.retail D.T.refer
## 2643 2648 2646
## D.T.stock D.T.seller D.T.phone
## 2643 2643 2647
## D.T.problem D.T.manufactur D.T.certifi
## 2651 2649 2647
## D.T.ship D.T.heavili D.T.non
## 2646 2646 2649
## D.T.handset D.T.sticker D.T.qualiti
## 2650 2649 2651
## D.T.least D.T.technician D.T.sync
## 2653 2652 2652
## D.nwrds.log D.nwrds.unq.log D.sum.TfIdf
## 1520 1521 1521
## D.ratio.sum.TfIdf.nwrds D.nchrs.log D.nuppr.log
## 1521 1520 1522
## D.ndgts.log D.npnct01.log D.npnct03.log
## 2427 2579 2614
## D.npnct05.log D.npnct06.log D.npnct08.log
## 2592 2554 2581
## D.npnct09.log D.npnct10.log D.npnct11.log
## 2641 2648 2301
## D.npnct12.log D.npnct13.log D.npnct14.log
## 2538 1932 2582
## D.npnct15.log D.npnct16.log D.npnct24.log
## 2637 2546 1520
## D.npnct28.log D.nstopwrds.log D.P.mini
## 2649 1664 2623
## D.P.air D.P.black D.P.white
## 2636 2640 2647
## D.P.spacegray
## 2650
## [1] "numeric data w/ Infs in : "
## named integer(0)
## [1] "numeric data w/ NaNs in : "
## named integer(0)
## [1] "string data missing in : "
## description condition cellular carrier color storage
## 1520 0 0 0 0 0
## productline .grpid prdline.my descr.my
## 0 NA 0 1520
# glb_allobs_df %>% filter(is.na(Married.fctr)) %>% tbl_df()
# glb_allobs_df %>% count(Married.fctr)
# levels(glb_allobs_df$Married.fctr)
glb_chunks_df <- myadd_chunk(glb_chunks_df, "partition.data.training", major.inc=TRUE)
## label step_major step_minor bgn end elapsed
## 8 select.features 5 0 60.320 66.001 5.681
## 9 partition.data.training 6 0 66.001 NA NA
6.0: partition data trainingif (all(is.na(glb_newobs_df[, glb_rsp_var]))) {
set.seed(glb_split_sample.seed)
OOB_size <- nrow(glb_newobs_df) * 1.1
if (is.null(glb_category_var)) {
require(caTools)
split <- sample.split(glb_trnobs_df[, glb_rsp_var_raw],
SplitRatio=OOB_size / nrow(glb_trnobs_df))
glb_OOBobs_df <- glb_trnobs_df[split ,]
glb_fitobs_df <- glb_trnobs_df[!split, ]
} else {
sample_vars <- c(glb_rsp_var_raw, glb_category_var)
rspvar_freq_df <- orderBy(reformulate(glb_rsp_var_raw),
mycreate_sqlxtab_df(glb_trnobs_df, glb_rsp_var_raw))
OOB_rspvar_size <- 1.0 * OOB_size * rspvar_freq_df$.n / sum(rspvar_freq_df$.n)
newobs_freq_df <- orderBy(reformulate(glb_category_var),
mycreate_sqlxtab_df(glb_newobs_df, glb_category_var))
trnobs_freq_df <- orderBy(reformulate(glb_category_var),
mycreate_sqlxtab_df(glb_trnobs_df, glb_category_var))
allobs_freq_df <- merge(newobs_freq_df, trnobs_freq_df, by=glb_category_var,
all=TRUE, sort=TRUE, suffixes=c(".Tst", ".Train"))
allobs_freq_df[is.na(allobs_freq_df)] <- 0
OOB_strata_size <- ceiling(
as.vector(matrix(allobs_freq_df$.n.Tst * 1.0 / sum(allobs_freq_df$.n.Tst)) %*%
matrix(OOB_rspvar_size, nrow=1)))
OOB_strata_size[OOB_strata_size == 0] <- 1
OOB_strata_df <- expand.grid(glb_rsp_var_raw=rspvar_freq_df[, glb_rsp_var_raw],
glb_category_var=allobs_freq_df[, glb_category_var])
names(OOB_strata_df) <- sample_vars
OOB_strata_df <- orderBy(reformulate(sample_vars), OOB_strata_df)
trnobs_univ_df <- orderBy(reformulate(sample_vars),
mycreate_sqlxtab_df(glb_trnobs_df, sample_vars))
trnobs_univ_df <- merge(trnobs_univ_df, OOB_strata_df, all=TRUE)
tmp_trnobs_df <- orderBy(reformulate(c(glb_rsp_var_raw, glb_category_var)),
glb_trnobs_df)
require(sampling)
split_strata <- strata(tmp_trnobs_df,
stratanames=c(glb_rsp_var_raw, glb_category_var),
size=OOB_strata_size[!is.na(trnobs_univ_df$.n)],
method="srswor")
glb_OOBobs_df <- getdata(tmp_trnobs_df, split_strata)[, names(glb_trnobs_df)]
glb_fitobs_df <- glb_trnobs_df[!glb_trnobs_df[, glb_id_var] %in%
glb_OOBobs_df[, glb_id_var], ]
}
} else {
print(sprintf("Newdata contains non-NA data for %s; setting OOB to Newdata",
glb_rsp_var))
glb_fitobs_df <- glb_trnobs_df; glb_OOBobs_df <- glb_newobs_df
}
## Loading required package: sampling
##
## Attaching package: 'sampling'
##
## The following objects are masked from 'package:survival':
##
## cluster, strata
##
## The following object is masked from 'package:caret':
##
## cluster
if (!is.null(glb_max_fitobs) && (nrow(glb_fitobs_df) > glb_max_fitobs)) {
warning("glb_fitobs_df restricted to glb_max_fitobs: ",
format(glb_max_fitobs, big.mark=","))
org_fitobs_df <- glb_fitobs_df
glb_fitobs_df <-
org_fitobs_df[split <- sample.split(org_fitobs_df[, glb_rsp_var_raw],
SplitRatio=glb_max_fitobs), ]
org_fitobs_df <- NULL
}
glb_allobs_df$.lcn <- ""; glb_trnobs_df$.lcn <- "";
glb_allobs_df[glb_allobs_df[, glb_id_var] %in%
glb_fitobs_df[, glb_id_var], ".lcn"] <- "Fit"
glb_trnobs_df[glb_trnobs_df[, glb_id_var] %in%
glb_fitobs_df[, glb_id_var], ".lcn"] <- "Fit"
glb_allobs_df[glb_allobs_df[, glb_id_var] %in%
glb_OOBobs_df[, glb_id_var], ".lcn"] <- "OOB"
glb_trnobs_df[glb_trnobs_df[, glb_id_var] %in%
glb_OOBobs_df[, glb_id_var], ".lcn"] <- "OOB"
dsp_class_dstrb <- function(obs_df, location_var, partition_var) {
xtab_df <- mycreate_xtab_df(obs_df, c(location_var, partition_var))
rownames(xtab_df) <- xtab_df[, location_var]
xtab_df <- xtab_df[, -grepl(location_var, names(xtab_df))]
print(xtab_df)
print(xtab_df / rowSums(xtab_df, na.rm=TRUE))
}
# Ensure proper splits by glb_rsp_var_raw & user-specified feature for OOB vs. new
if (!is.null(glb_category_var)) {
if (glb_is_classification)
dsp_class_dstrb(glb_allobs_df, ".lcn", glb_rsp_var_raw)
newobs_ctgry_df <- mycreate_sqlxtab_df(subset(glb_allobs_df, .src == "Test"),
glb_category_var)
OOBobs_ctgry_df <- mycreate_sqlxtab_df(subset(glb_allobs_df, .lcn == "OOB"),
glb_category_var)
glb_ctgry_df <- merge(newobs_ctgry_df, OOBobs_ctgry_df, by=glb_category_var
, all=TRUE, suffixes=c(".Tst", ".OOB"))
glb_ctgry_df$.freqRatio.Tst <- glb_ctgry_df$.n.Tst / sum(glb_ctgry_df$.n.Tst, na.rm=TRUE)
glb_ctgry_df$.freqRatio.OOB <- glb_ctgry_df$.n.OOB / sum(glb_ctgry_df$.n.OOB, na.rm=TRUE)
print(orderBy(~-.freqRatio.Tst-.freqRatio.OOB, glb_ctgry_df))
}
## sold.0 sold.1 sold.NA
## NA NA 798
## Fit 522 447 NA
## OOB 477 413 NA
## sold.0 sold.1 sold.NA
## NA NA 1
## Fit 0.5386997 0.4613003 NA
## OOB 0.5359551 0.4640449 NA
## prdl.my.descr.fctr .n.Tst .n.OOB .freqRatio.Tst .freqRatio.OOB
## 9 iPadAir#0 88 98 0.11027569 0.11011236
## 5 iPad 2#0 83 93 0.10401003 0.10449438
## 6 iPad 2#1 71 79 0.08897243 0.08876404
## 13 iPadmini#0 65 73 0.08145363 0.08202247
## 8 iPad 3+#1 64 71 0.08020050 0.07977528
## 11 iPadmini 2+#0 64 71 0.08020050 0.07977528
## 7 iPad 3+#0 59 66 0.07393484 0.07415730
## 10 iPadAir#1 49 54 0.06140351 0.06067416
## 14 iPadmini#1 49 54 0.06140351 0.06067416
## 3 iPad 1#0 46 52 0.05764411 0.05842697
## 1 Unknown#0 45 50 0.05639098 0.05617978
## 4 iPad 1#1 43 48 0.05388471 0.05393258
## 2 Unknown#1 42 47 0.05263158 0.05280899
## 12 iPadmini 2+#1 30 34 0.03759398 0.03820225
# Run this line by line
print("glb_feats_df:"); print(dim(glb_feats_df))
## [1] "glb_feats_df:"
## [1] 224 12
sav_feats_df <- glb_feats_df
glb_feats_df <- sav_feats_df
glb_feats_df[, "rsp_var_raw"] <- FALSE
glb_feats_df[glb_feats_df$id == glb_rsp_var_raw, "rsp_var_raw"] <- TRUE
glb_feats_df$exclude.as.feat <- (glb_feats_df$exclude.as.feat == 1)
if (!is.null(glb_id_var) && glb_id_var != ".rownames")
glb_feats_df[glb_feats_df$id %in% glb_id_var, "id_var"] <- TRUE
add_feats_df <- data.frame(id=glb_rsp_var, exclude.as.feat=TRUE, rsp_var=TRUE)
row.names(add_feats_df) <- add_feats_df$id; print(add_feats_df)
## id exclude.as.feat rsp_var
## sold.fctr sold.fctr TRUE TRUE
glb_feats_df <- myrbind_df(glb_feats_df, add_feats_df)
if (glb_id_var != ".rownames")
print(subset(glb_feats_df, rsp_var_raw | rsp_var | id_var)) else
print(subset(glb_feats_df, rsp_var_raw | rsp_var))
## id cor.y exclude.as.feat cor.y.abs cor.high.X
## 219 sold 1.0000000 TRUE 1.0000000 <NA>
## 210 UniqueID -0.1895466 TRUE 0.1895466 <NA>
## sold.fctr sold.fctr NA TRUE NA <NA>
## freqRatio percentUnique zeroVar nzv myNearZV is.cor.y.abs.low
## 219 1.161628 0.1075847 FALSE FALSE FALSE FALSE
## 210 1.000000 100.0000000 FALSE FALSE FALSE FALSE
## sold.fctr NA NA NA NA NA NA
## interaction.feat rsp_var_raw id_var rsp_var
## 219 <NA> TRUE NA NA
## 210 <NA> FALSE TRUE NA
## sold.fctr <NA> NA NA TRUE
print("glb_feats_df vs. glb_allobs_df: ");
## [1] "glb_feats_df vs. glb_allobs_df: "
print(setdiff(glb_feats_df$id, names(glb_allobs_df)))
## [1] "D.T.left" "D.T.two" "D.T.corpor" "D.T.name"
## [5] "D.T.scroll" "D.T.imag" "D.T.shape" "D.T.disclaim"
## [9] "D.T.els" "D.T.essenti" "D.T.repeat." "D.T.super"
## [13] "D.npnct07.log" "D.T.anoth" "D.T.full" "D.T.edg"
## [17] "D.T.must" "D.T.photo" "D.T.seal" "D.T.speaker"
## [21] "D.T.side" "D.T.keyboard" "D.T.upper" "D.T.sinc"
## [25] "D.T.X2016" "D.T.geek" "D.T.squad" "D.T.chip"
## [29] "D.T.right" "D.T.lightn" "D.T.stylus" "D.T.detail"
## [33] "D.T.activ" "D.T.beetl" "D.T.defens" "D.T.final"
## [37] "D.T.higher" "D.T.money" "D.npnct18.log" "D.T.bodi"
## [41] "D.T.pic" "D.T.passcod" "D.P.gold" "D.T.contain"
## [45] "D.T.correct" "D.T.featur" "D.T.bare" "D.T.averag"
## [49] "D.T.inspect" "D.P.http" "D.T.expect" "D.T.intro"
## [53] "D.npnct02.log" "D.npnct04.log" "D.npnct17.log" "D.npnct19.log"
## [57] "D.npnct20.log" "D.npnct21.log" "D.npnct22.log" "D.npnct23.log"
## [61] "D.npnct25.log" "D.npnct26.log" "D.npnct27.log" "D.npnct29.log"
## [65] "D.npnct30.log"
print("glb_allobs_df vs. glb_feats_df: ");
## [1] "glb_allobs_df vs. glb_feats_df: "
# Ensure these are only chr vars
print(setdiff(setdiff(names(glb_allobs_df), glb_feats_df$id),
myfind_chr_cols_df(glb_allobs_df)))
## character(0)
#print(setdiff(setdiff(names(glb_allobs_df), glb_exclude_vars_as_features),
# glb_feats_df$id))
print("glb_allobs_df: "); print(dim(glb_allobs_df))
## [1] "glb_allobs_df: "
## [1] 2657 172
print("glb_trnobs_df: "); print(dim(glb_trnobs_df))
## [1] "glb_trnobs_df: "
## [1] 1859 172
print("glb_fitobs_df: "); print(dim(glb_fitobs_df))
## [1] "glb_fitobs_df: "
## [1] 969 171
print("glb_OOBobs_df: "); print(dim(glb_OOBobs_df))
## [1] "glb_OOBobs_df: "
## [1] 890 171
print("glb_newobs_df: "); print(dim(glb_newobs_df))
## [1] "glb_newobs_df: "
## [1] 798 171
# # Does not handle NULL or length(glb_id_var) > 1
if (glb_save_envir)
save(glb_feats_df,
glb_allobs_df, #glb_trnobs_df, glb_fitobs_df, glb_OOBobs_df, glb_newobs_df,
file=paste0(glb_out_pfx, "blddfs_dsk.RData"))
# load(paste0(glb_out_pfx, "blddfs_dsk.RData"))
# if (!all.equal(tmp_feats_df, glb_feats_df))
# stop("glb_feats_df r/w not working")
# if (!all.equal(tmp_entity_df, glb_allobs_df))
# stop("glb_allobs_df r/w not working")
rm(split)
## Warning in rm(split): object 'split' not found
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.models", major.inc=TRUE)
## label step_major step_minor bgn end elapsed
## 9 partition.data.training 6 0 66.001 67.215 1.214
## 10 fit.models 7 0 67.216 NA NA
7.0: fit models# load(paste0(glb_out_pfx, "dsk.RData"))
if (glb_is_classification && glb_is_binomial &&
(length(unique(glb_fitobs_df[, glb_rsp_var])) < 2))
stop("glb_fitobs_df$", glb_rsp_var, ": contains less than 2 unique values: ",
paste0(unique(glb_fitobs_df[, glb_rsp_var]), collapse=", "))
max_cor_y_x_vars <- orderBy(~ -cor.y.abs,
subset(glb_feats_df, (exclude.as.feat == 0) & !is.cor.y.abs.low &
is.na(cor.high.X)))[1:2, "id"]
# while(length(max_cor_y_x_vars) < 2) {
# max_cor_y_x_vars <- c(max_cor_y_x_vars, orderBy(~ -cor.y.abs,
# subset(glb_feats_df, (exclude.as.feat == 0) & !is.cor.y.abs.low))[3, "id"])
# }
if (!is.null(glb_Baseline_mdl_var)) {
if ((max_cor_y_x_vars[1] != glb_Baseline_mdl_var) &
(glb_feats_df[glb_feats_df$id == max_cor_y_x_vars[1], "cor.y.abs"] >
glb_feats_df[glb_feats_df$id == glb_Baseline_mdl_var, "cor.y.abs"]))
stop(max_cor_y_x_vars[1], " has a higher correlation with ", glb_rsp_var,
" than the Baseline var: ", glb_Baseline_mdl_var)
}
glb_model_type <- ifelse(glb_is_regression, "regression", "classification")
# Baseline
if (!is.null(glb_Baseline_mdl_var))
ret_lst <- myfit_mdl(model_id="Baseline",
model_method="mybaseln_classfr",
indep_vars_vctr=glb_Baseline_mdl_var,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df)
# Most Frequent Outcome "MFO" model: mean(y) for regression
# Not using caret's nullModel since model stats not avl
# Cannot use rpart for multinomial classification since it predicts non-MFO
ret_lst <- myfit_mdl(model_id="MFO",
model_method=ifelse(glb_is_regression, "lm", "myMFO_classfr"),
model_type=glb_model_type,
indep_vars_vctr=".rnorm",
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df)
## [1] "fitting model: MFO.myMFO_classfr"
## [1] " indep_vars: .rnorm"
## Fitting parameter = none on full training set
## [1] "in MFO.Classifier$fit"
## [1] "unique.vals:"
## [1] N Y
## Levels: N Y
## [1] "unique.prob:"
## y
## N Y
## 0.5386997 0.4613003
## [1] "MFO.val:"
## [1] "N"
## Length Class Mode
## unique.vals 2 factor numeric
## unique.prob 2 -none- numeric
## MFO.val 1 -none- character
## x.names 1 -none- character
## xNames 1 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## [1] " calling mypredict_mdl for fit:"
## Loading required package: ROCR
## Loading required package: gplots
##
## Attaching package: 'gplots'
##
## The following object is masked from 'package:stats':
##
## lowess
## [1] "in MFO.Classifier$prob"
## N Y
## 1 0.5386997 0.4613003
## 2 0.5386997 0.4613003
## 3 0.5386997 0.4613003
## 4 0.5386997 0.4613003
## 5 0.5386997 0.4613003
## 6 0.5386997 0.4613003
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.MFO.myMFO_classfr.N
## 1 N 522
## 2 Y 447
## Prediction
## Reference N Y
## N 522 0
## Y 447 0
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.386997e-01 0.000000e+00 5.067192e-01 5.704443e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 5.131824e-01 8.826336e-99
## [1] " calling mypredict_mdl for OOB:"
## [1] "in MFO.Classifier$prob"
## N Y
## 1 0.5386997 0.4613003
## 2 0.5386997 0.4613003
## 3 0.5386997 0.4613003
## 4 0.5386997 0.4613003
## 5 0.5386997 0.4613003
## 6 0.5386997 0.4613003
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.MFO.myMFO_classfr.N
## 1 N 477
## 2 Y 413
## Prediction
## Reference N Y
## N 477 0
## Y 413 0
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.359551e-01 0.000000e+00 5.025561e-01 5.691153e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 5.137245e-01 2.217817e-91
## model_id model_method feats max.nTuningRuns
## 1 MFO.myMFO_classfr myMFO_classfr .rnorm 0
## min.elapsedtime.everything min.elapsedtime.final max.auc.fit
## 1 0.367 0.003 0.5
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.5 0 0.5386997
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.5067192 0.5704443 0 0.5
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.5 0 0.5359551
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.5025561 0.5691153 0
if (glb_is_classification)
# "random" model - only for classification;
# none needed for regression since it is same as MFO
ret_lst <- myfit_mdl(model_id="Random", model_method="myrandom_classfr",
model_type=glb_model_type,
indep_vars_vctr=".rnorm",
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df)
## [1] "fitting model: Random.myrandom_classfr"
## [1] " indep_vars: .rnorm"
## Fitting parameter = none on full training set
## Length Class Mode
## unique.vals 2 factor numeric
## unique.prob 2 table numeric
## xNames 1 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## [1] " calling mypredict_mdl for fit:"
## [1] "in Random.Classifier$prob"
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.6313559
## 3 0.2 0.6313559
## 4 0.3 0.6313559
## 5 0.4 0.6313559
## 6 0.5 0.4611973
## 7 0.6 0.0000000
## 8 0.7 0.0000000
## 9 0.8 0.0000000
## 10 0.9 0.0000000
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.Random.myrandom_classfr.Y
## 1 N 522
## 2 Y 447
## Prediction
## Reference N Y
## N 0 522
## Y 0 447
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 4.613003e-01 0.000000e+00 4.295557e-01 4.932808e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 9.999994e-01 4.227904e-115
## [1] " calling mypredict_mdl for OOB:"
## [1] "in Random.Classifier$prob"
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.6339217
## 3 0.2 0.6339217
## 4 0.3 0.6339217
## 5 0.4 0.6339217
## 6 0.5 0.4817518
## 7 0.6 0.0000000
## 8 0.7 0.0000000
## 9 0.8 0.0000000
## 10 0.9 0.0000000
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.Random.myrandom_classfr.Y
## 1 N 477
## 2 Y 413
## Prediction
## Reference N Y
## N 0 477
## Y 0 413
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 4.640449e-01 0.000000e+00 4.308847e-01 4.974439e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 9.999925e-01 2.613895e-105
## model_id model_method feats max.nTuningRuns
## 1 Random.myrandom_classfr myrandom_classfr .rnorm 0
## min.elapsedtime.everything min.elapsedtime.final max.auc.fit
## 1 0.257 0.002 0.4960722
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.4 0.6313559 0.4613003
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.4295557 0.4932808 0 0.5185354
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.4 0.6339217 0.4640449
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.4308847 0.4974439 0
# Any models that have tuning parameters has "better" results with cross-validation
# (except rf) & "different" results for different outcome metrics
# Max.cor.Y
# Check impact of cv
# rpart is not a good candidate since caret does not optimize cp (only tuning parameter of rpart) well
ret_lst <- myfit_mdl(model_id="Max.cor.Y.cv.0",
model_method="rpart",
model_type=glb_model_type,
indep_vars_vctr=max_cor_y_x_vars,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df)
## [1] "fitting model: Max.cor.Y.cv.0.rpart"
## [1] " indep_vars: biddable, startprice.diff"
## Loading required package: rpart
## Fitting cp = 0.528 on full training set
## Loading required package: rpart.plot
## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7,
## cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2,
## surrogatestyle = 0, maxdepth = 30, xval = 0))
## n= 969
##
## CP nsplit rel error
## 1 0.5279642 0 1
##
## Node number 1: 969 observations
## predicted class=N expected loss=0.4613003 P(node) =1
## class counts: 522 447
## probabilities: 0.539 0.461
##
## n= 969
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 969 447 N (0.5386997 0.4613003) *
## [1] " calling mypredict_mdl for fit:"
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.Max.cor.Y.cv.0.rpart.N
## 1 N 522
## 2 Y 447
## Prediction
## Reference N Y
## N 522 0
## Y 447 0
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.386997e-01 0.000000e+00 5.067192e-01 5.704443e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 5.131824e-01 8.826336e-99
## [1] " calling mypredict_mdl for OOB:"
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.Max.cor.Y.cv.0.rpart.N
## 1 N 477
## 2 Y 413
## Prediction
## Reference N Y
## N 477 0
## Y 413 0
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 5.359551e-01 0.000000e+00 5.025561e-01 5.691153e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 5.137245e-01 2.217817e-91
## model_id model_method feats
## 1 Max.cor.Y.cv.0.rpart rpart biddable, startprice.diff
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 0 0.598 0.012
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.5 0.5 0 0.5386997
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.5067192 0.5704443 0 0.5
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.5 0 0.5359551
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.5025561 0.5691153 0
ret_lst <- myfit_mdl(model_id="Max.cor.Y.cv.0.cp.0",
model_method="rpart",
model_type=glb_model_type,
indep_vars_vctr=max_cor_y_x_vars,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
n_cv_folds=0,
tune_models_df=data.frame(parameter="cp", min=0.0, max=0.0, by=0.1))
## [1] "fitting model: Max.cor.Y.cv.0.cp.0.rpart"
## [1] " indep_vars: biddable, startprice.diff"
## Fitting cp = 0 on full training set
## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7,
## cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2,
## surrogatestyle = 0, maxdepth = 30, xval = 0))
## n= 969
##
## CP nsplit rel error
## 1 0.5279642058 0 1.0000000
## 2 0.1342281879 1 0.4720358
## 3 0.0041946309 2 0.3378076
## 4 0.0033557047 11 0.2997763
## 5 0.0022371365 13 0.2930649
## 6 0.0011185682 15 0.2885906
## 7 0.0005592841 21 0.2796421
## 8 0.0000000000 25 0.2774049
##
## Variable importance
## biddable startprice.diff
## 52 48
##
## Node number 1: 969 observations, complexity param=0.5279642
## predicted class=N expected loss=0.4613003 P(node) =1
## class counts: 522 447
## probabilities: 0.539 0.461
## left son=2 (539 obs) right son=3 (430 obs)
## Primary splits:
## biddable < 0.5 to the left, improve=151.58290, (0 missing)
## startprice.diff < 62.89456 to the right, improve= 82.96307, (0 missing)
## Surrogate splits:
## startprice.diff < 250.1071 to the left, agree=0.562, adj=0.014, (0 split)
##
## Node number 2: 539 observations, complexity param=0.004194631
## predicted class=N expected loss=0.2115028 P(node) =0.5562436
## class counts: 425 114
## probabilities: 0.788 0.212
## left son=4 (167 obs) right son=5 (372 obs)
## Primary splits:
## startprice.diff < 40.80806 to the right, improve=13.91796, (0 missing)
##
## Node number 3: 430 observations, complexity param=0.1342282
## predicted class=Y expected loss=0.2255814 P(node) =0.4437564
## class counts: 97 333
## probabilities: 0.226 0.774
## left son=6 (80 obs) right son=7 (350 obs)
## Primary splits:
## startprice.diff < 63.51092 to the right, improve=82.90292, (0 missing)
##
## Node number 4: 167 observations
## predicted class=N expected loss=0.04191617 P(node) =0.1723426
## class counts: 160 7
## probabilities: 0.958 0.042
##
## Node number 5: 372 observations, complexity param=0.004194631
## predicted class=N expected loss=0.2876344 P(node) =0.3839009
## class counts: 265 107
## probabilities: 0.712 0.288
## left son=10 (128 obs) right son=11 (244 obs)
## Primary splits:
## startprice.diff < -35.3304 to the left, improve=13.51309, (0 missing)
##
## Node number 6: 80 observations
## predicted class=N expected loss=0.125 P(node) =0.08255934
## class counts: 70 10
## probabilities: 0.875 0.125
##
## Node number 7: 350 observations, complexity param=0.003355705
## predicted class=Y expected loss=0.07714286 P(node) =0.3611971
## class counts: 27 323
## probabilities: 0.077 0.923
## left son=14 (24 obs) right son=15 (326 obs)
## Primary splits:
## startprice.diff < 44.72834 to the right, improve=3.382343, (0 missing)
##
## Node number 10: 128 observations
## predicted class=N expected loss=0.1015625 P(node) =0.1320949
## class counts: 115 13
## probabilities: 0.898 0.102
##
## Node number 11: 244 observations, complexity param=0.004194631
## predicted class=N expected loss=0.3852459 P(node) =0.251806
## class counts: 150 94
## probabilities: 0.615 0.385
## left son=22 (65 obs) right son=23 (179 obs)
## Primary splits:
## startprice.diff < 20.77843 to the right, improve=2.079314, (0 missing)
##
## Node number 14: 24 observations, complexity param=0.003355705
## predicted class=Y expected loss=0.3333333 P(node) =0.0247678
## class counts: 8 16
## probabilities: 0.333 0.667
## left son=28 (7 obs) right son=29 (17 obs)
## Primary splits:
## startprice.diff < 47.96221 to the left, improve=2.868347, (0 missing)
##
## Node number 15: 326 observations
## predicted class=Y expected loss=0.05828221 P(node) =0.3364293
## class counts: 19 307
## probabilities: 0.058 0.942
##
## Node number 22: 65 observations, complexity param=0.002237136
## predicted class=N expected loss=0.2769231 P(node) =0.06707946
## class counts: 47 18
## probabilities: 0.723 0.277
## left son=44 (58 obs) right son=45 (7 obs)
## Primary splits:
## startprice.diff < 37.95043 to the left, improve=1.360818, (0 missing)
##
## Node number 23: 179 observations, complexity param=0.004194631
## predicted class=N expected loss=0.424581 P(node) =0.1847265
## class counts: 103 76
## probabilities: 0.575 0.425
## left son=46 (168 obs) right son=47 (11 obs)
## Primary splits:
## startprice.diff < -31.00081 to the right, improve=1.051349, (0 missing)
##
## Node number 28: 7 observations
## predicted class=N expected loss=0.2857143 P(node) =0.007223942
## class counts: 5 2
## probabilities: 0.714 0.286
##
## Node number 29: 17 observations
## predicted class=Y expected loss=0.1764706 P(node) =0.01754386
## class counts: 3 14
## probabilities: 0.176 0.824
##
## Node number 44: 58 observations, complexity param=0.0005592841
## predicted class=N expected loss=0.2413793 P(node) =0.05985552
## class counts: 44 14
## probabilities: 0.759 0.241
## left son=88 (9 obs) right son=89 (49 obs)
## Primary splits:
## startprice.diff < 36.35647 to the right, improve=1.241379, (0 missing)
##
## Node number 45: 7 observations
## predicted class=Y expected loss=0.4285714 P(node) =0.007223942
## class counts: 3 4
## probabilities: 0.429 0.571
##
## Node number 46: 168 observations, complexity param=0.004194631
## predicted class=N expected loss=0.4107143 P(node) =0.1733746
## class counts: 99 69
## probabilities: 0.589 0.411
## left son=92 (15 obs) right son=93 (153 obs)
## Primary splits:
## startprice.diff < -21.8033 to the left, improve=2.5345, (0 missing)
##
## Node number 47: 11 observations
## predicted class=Y expected loss=0.3636364 P(node) =0.01135191
## class counts: 4 7
## probabilities: 0.364 0.636
##
## Node number 88: 9 observations
## predicted class=N expected loss=0 P(node) =0.009287926
## class counts: 9 0
## probabilities: 1.000 0.000
##
## Node number 89: 49 observations, complexity param=0.0005592841
## predicted class=N expected loss=0.2857143 P(node) =0.0505676
## class counts: 35 14
## probabilities: 0.714 0.286
## left son=178 (8 obs) right son=179 (41 obs)
## Primary splits:
## startprice.diff < 24.4582 to the left, improve=0.4939024, (0 missing)
##
## Node number 92: 15 observations
## predicted class=N expected loss=0.1333333 P(node) =0.01547988
## class counts: 13 2
## probabilities: 0.867 0.133
##
## Node number 93: 153 observations, complexity param=0.004194631
## predicted class=N expected loss=0.4379085 P(node) =0.1578947
## class counts: 86 67
## probabilities: 0.562 0.438
## left son=186 (129 obs) right son=187 (24 obs)
## Primary splits:
## startprice.diff < -15.07226 to the right, improve=1.203982, (0 missing)
##
## Node number 178: 8 observations
## predicted class=N expected loss=0.125 P(node) =0.008255934
## class counts: 7 1
## probabilities: 0.875 0.125
##
## Node number 179: 41 observations, complexity param=0.0005592841
## predicted class=N expected loss=0.3170732 P(node) =0.04231166
## class counts: 28 13
## probabilities: 0.683 0.317
## left son=358 (18 obs) right son=359 (23 obs)
## Primary splits:
## startprice.diff < 30.11884 to the right, improve=0.5773536, (0 missing)
##
## Node number 186: 129 observations, complexity param=0.004194631
## predicted class=N expected loss=0.4108527 P(node) =0.1331269
## class counts: 76 53
## probabilities: 0.589 0.411
## left son=372 (27 obs) right son=373 (102 obs)
## Primary splits:
## startprice.diff < -8.72155 to the left, improve=1.569438, (0 missing)
##
## Node number 187: 24 observations, complexity param=0.002237136
## predicted class=Y expected loss=0.4166667 P(node) =0.0247678
## class counts: 10 14
## probabilities: 0.417 0.583
## left son=374 (15 obs) right son=375 (9 obs)
## Primary splits:
## startprice.diff < -16.81039 to the left, improve=1.088889, (0 missing)
##
## Node number 358: 18 observations
## predicted class=N expected loss=0.2222222 P(node) =0.01857585
## class counts: 14 4
## probabilities: 0.778 0.222
##
## Node number 359: 23 observations, complexity param=0.0005592841
## predicted class=N expected loss=0.3913043 P(node) =0.02373581
## class counts: 14 9
## probabilities: 0.609 0.391
## left son=718 (16 obs) right son=719 (7 obs)
## Primary splits:
## startprice.diff < 28.47301 to the left, improve=0.6529503, (0 missing)
##
## Node number 372: 27 observations
## predicted class=N expected loss=0.2592593 P(node) =0.02786378
## class counts: 20 7
## probabilities: 0.741 0.259
##
## Node number 373: 102 observations, complexity param=0.004194631
## predicted class=N expected loss=0.4509804 P(node) =0.1052632
## class counts: 56 46
## probabilities: 0.549 0.451
## left son=746 (88 obs) right son=747 (14 obs)
## Primary splits:
## startprice.diff < -2.461955 to the right, improve=3.636427, (0 missing)
##
## Node number 374: 15 observations
## predicted class=N expected loss=0.4666667 P(node) =0.01547988
## class counts: 8 7
## probabilities: 0.533 0.467
##
## Node number 375: 9 observations
## predicted class=Y expected loss=0.2222222 P(node) =0.009287926
## class counts: 2 7
## probabilities: 0.222 0.778
##
## Node number 718: 16 observations
## predicted class=N expected loss=0.3125 P(node) =0.01651187
## class counts: 11 5
## probabilities: 0.688 0.312
##
## Node number 719: 7 observations
## predicted class=Y expected loss=0.4285714 P(node) =0.007223942
## class counts: 3 4
## probabilities: 0.429 0.571
##
## Node number 746: 88 observations, complexity param=0.004194631
## predicted class=N expected loss=0.3977273 P(node) =0.09081527
## class counts: 53 35
## probabilities: 0.602 0.398
## left son=1492 (78 obs) right son=1493 (10 obs)
## Primary splits:
## startprice.diff < 19.13936 to the left, improve=0.9231935, (0 missing)
##
## Node number 747: 14 observations
## predicted class=Y expected loss=0.2142857 P(node) =0.01444788
## class counts: 3 11
## probabilities: 0.214 0.786
##
## Node number 1492: 78 observations, complexity param=0.001118568
## predicted class=N expected loss=0.3717949 P(node) =0.08049536
## class counts: 49 29
## probabilities: 0.628 0.372
## left son=2984 (9 obs) right son=2985 (69 obs)
## Primary splits:
## startprice.diff < 16.92743 to the right, improve=1.382757, (0 missing)
##
## Node number 1493: 10 observations
## predicted class=Y expected loss=0.4 P(node) =0.01031992
## class counts: 4 6
## probabilities: 0.400 0.600
##
## Node number 2984: 9 observations
## predicted class=N expected loss=0.1111111 P(node) =0.009287926
## class counts: 8 1
## probabilities: 0.889 0.111
##
## Node number 2985: 69 observations, complexity param=0.001118568
## predicted class=N expected loss=0.4057971 P(node) =0.07120743
## class counts: 41 28
## probabilities: 0.594 0.406
## left son=5970 (54 obs) right son=5971 (15 obs)
## Primary splits:
## startprice.diff < 12.81441 to the left, improve=0.6235105, (0 missing)
##
## Node number 5970: 54 observations, complexity param=0.001118568
## predicted class=N expected loss=0.3703704 P(node) =0.05572755
## class counts: 34 20
## probabilities: 0.630 0.370
## left son=11940 (11 obs) right son=11941 (43 obs)
## Primary splits:
## startprice.diff < 9.7053 to the right, improve=0.9822254, (0 missing)
##
## Node number 5971: 15 observations
## predicted class=Y expected loss=0.4666667 P(node) =0.01547988
## class counts: 7 8
## probabilities: 0.467 0.533
##
## Node number 11940: 11 observations
## predicted class=N expected loss=0.1818182 P(node) =0.01135191
## class counts: 9 2
## probabilities: 0.818 0.182
##
## Node number 11941: 43 observations, complexity param=0.001118568
## predicted class=N expected loss=0.4186047 P(node) =0.04437564
## class counts: 25 18
## probabilities: 0.581 0.419
## left son=23882 (36 obs) right son=23883 (7 obs)
## Primary splits:
## startprice.diff < 7.505582 to the left, improve=0.39055, (0 missing)
##
## Node number 23882: 36 observations, complexity param=0.001118568
## predicted class=N expected loss=0.3888889 P(node) =0.0371517
## class counts: 22 14
## probabilities: 0.611 0.389
## left son=47764 (7 obs) right son=47765 (29 obs)
## Primary splits:
## startprice.diff < 4.593603 to the right, improve=1.051998, (0 missing)
##
## Node number 23883: 7 observations
## predicted class=Y expected loss=0.4285714 P(node) =0.007223942
## class counts: 3 4
## probabilities: 0.429 0.571
##
## Node number 47764: 7 observations
## predicted class=N expected loss=0.1428571 P(node) =0.007223942
## class counts: 6 1
## probabilities: 0.857 0.143
##
## Node number 47765: 29 observations, complexity param=0.001118568
## predicted class=N expected loss=0.4482759 P(node) =0.02992776
## class counts: 16 13
## probabilities: 0.552 0.448
## left son=95530 (19 obs) right son=95531 (10 obs)
## Primary splits:
## startprice.diff < 1.154237 to the left, improve=0.7027223, (0 missing)
##
## Node number 95530: 19 observations
## predicted class=N expected loss=0.3684211 P(node) =0.01960784
## class counts: 12 7
## probabilities: 0.632 0.368
##
## Node number 95531: 10 observations
## predicted class=Y expected loss=0.4 P(node) =0.01031992
## class counts: 4 6
## probabilities: 0.400 0.600
##
## n= 969
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 969 447 N (0.53869969 0.46130031)
## 2) biddable< 0.5 539 114 N (0.78849722 0.21150278)
## 4) startprice.diff>=40.80806 167 7 N (0.95808383 0.04191617) *
## 5) startprice.diff< 40.80806 372 107 N (0.71236559 0.28763441)
## 10) startprice.diff< -35.3304 128 13 N (0.89843750 0.10156250) *
## 11) startprice.diff>=-35.3304 244 94 N (0.61475410 0.38524590)
## 22) startprice.diff>=20.77843 65 18 N (0.72307692 0.27692308)
## 44) startprice.diff< 37.95043 58 14 N (0.75862069 0.24137931)
## 88) startprice.diff>=36.35647 9 0 N (1.00000000 0.00000000) *
## 89) startprice.diff< 36.35647 49 14 N (0.71428571 0.28571429)
## 178) startprice.diff< 24.4582 8 1 N (0.87500000 0.12500000) *
## 179) startprice.diff>=24.4582 41 13 N (0.68292683 0.31707317)
## 358) startprice.diff>=30.11884 18 4 N (0.77777778 0.22222222) *
## 359) startprice.diff< 30.11884 23 9 N (0.60869565 0.39130435)
## 718) startprice.diff< 28.47301 16 5 N (0.68750000 0.31250000) *
## 719) startprice.diff>=28.47301 7 3 Y (0.42857143 0.57142857) *
## 45) startprice.diff>=37.95043 7 3 Y (0.42857143 0.57142857) *
## 23) startprice.diff< 20.77843 179 76 N (0.57541899 0.42458101)
## 46) startprice.diff>=-31.00081 168 69 N (0.58928571 0.41071429)
## 92) startprice.diff< -21.8033 15 2 N (0.86666667 0.13333333) *
## 93) startprice.diff>=-21.8033 153 67 N (0.56209150 0.43790850)
## 186) startprice.diff>=-15.07226 129 53 N (0.58914729 0.41085271)
## 372) startprice.diff< -8.72155 27 7 N (0.74074074 0.25925926) *
## 373) startprice.diff>=-8.72155 102 46 N (0.54901961 0.45098039)
## 746) startprice.diff>=-2.461955 88 35 N (0.60227273 0.39772727)
## 1492) startprice.diff< 19.13936 78 29 N (0.62820513 0.37179487)
## 2984) startprice.diff>=16.92743 9 1 N (0.88888889 0.11111111) *
## 2985) startprice.diff< 16.92743 69 28 N (0.59420290 0.40579710)
## 5970) startprice.diff< 12.81441 54 20 N (0.62962963 0.37037037)
## 11940) startprice.diff>=9.7053 11 2 N (0.81818182 0.18181818) *
## 11941) startprice.diff< 9.7053 43 18 N (0.58139535 0.41860465)
## 23882) startprice.diff< 7.505582 36 14 N (0.61111111 0.38888889)
## 47764) startprice.diff>=4.593603 7 1 N (0.85714286 0.14285714) *
## 47765) startprice.diff< 4.593603 29 13 N (0.55172414 0.44827586)
## 95530) startprice.diff< 1.154237 19 7 N (0.63157895 0.36842105) *
## 95531) startprice.diff>=1.154237 10 4 Y (0.40000000 0.60000000) *
## 23883) startprice.diff>=7.505582 7 3 Y (0.42857143 0.57142857) *
## 5971) startprice.diff>=12.81441 15 7 Y (0.46666667 0.53333333) *
## 1493) startprice.diff>=19.13936 10 4 Y (0.40000000 0.60000000) *
## 747) startprice.diff< -2.461955 14 3 Y (0.21428571 0.78571429) *
## 187) startprice.diff< -15.07226 24 10 Y (0.41666667 0.58333333)
## 374) startprice.diff< -16.81039 15 7 N (0.53333333 0.46666667) *
## 375) startprice.diff>=-16.81039 9 2 Y (0.22222222 0.77777778) *
## 47) startprice.diff< -31.00081 11 4 Y (0.36363636 0.63636364) *
## 3) biddable>=0.5 430 97 Y (0.22558140 0.77441860)
## 6) startprice.diff>=63.51092 80 10 N (0.87500000 0.12500000) *
## 7) startprice.diff< 63.51092 350 27 Y (0.07714286 0.92285714)
## 14) startprice.diff>=44.72834 24 8 Y (0.33333333 0.66666667)
## 28) startprice.diff< 47.96221 7 2 N (0.71428571 0.28571429) *
## 29) startprice.diff>=47.96221 17 3 Y (0.17647059 0.82352941) *
## 15) startprice.diff< 44.72834 326 19 Y (0.05828221 0.94171779) *
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.7096774
## 3 0.2 0.8350305
## 4 0.3 0.8537634
## 5 0.4 0.8603352
## 6 0.5 0.8590909
## 7 0.6 0.8398058
## 8 0.7 0.8339483
## 9 0.8 0.8126582
## 10 0.9 0.7943079
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.N
## 1 N 459
## 2 Y 62
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.Y
## 1 63
## 2 385
## Prediction
## Reference N Y
## N 459 63
## Y 62 385
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.710010e-01 7.404889e-01 8.482486e-01 8.914697e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 5.246976e-109 1.000000e+00
## [1] " calling mypredict_mdl for OOB:"
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.7082601
## 3 0.2 0.7955556
## 4 0.3 0.8130841
## 5 0.4 0.8105516
## 6 0.5 0.8117359
## 7 0.6 0.8098318
## 8 0.7 0.8130719
## 9 0.8 0.8119891
## 10 0.9 0.8099861
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.3000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.N
## 1 N 382
## 2 Y 65
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.Y
## 1 95
## 2 348
## Prediction
## Reference N Y
## N 382 95
## Y 65 348
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.202247e-01 6.403332e-01 7.933882e-01 8.449213e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 4.501677e-71 2.186809e-02
## model_id model_method feats
## 1 Max.cor.Y.cv.0.cp.0.rpart rpart biddable, startprice.diff
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 0 0.471 0.008
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.9238966 0.4 0.8603352 0.871001
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.8482486 0.8914697 0.7404889 0.8997924
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.3 0.8130841 0.8202247
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.7933882 0.8449213 0.6403332
if (glb_is_regression || glb_is_binomial) # For multinomials this model will be run next by default
ret_lst <- myfit_mdl(model_id="Max.cor.Y",
model_method="rpart",
model_type=glb_model_type,
indep_vars_vctr=max_cor_y_x_vars,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
## [1] "fitting model: Max.cor.Y.rpart"
## [1] " indep_vars: biddable, startprice.diff"
## Aggregating results
## Selecting tuning parameters
## Fitting cp = 0.00419 on full training set
## Warning in myfit_mdl(model_id = "Max.cor.Y", model_method = "rpart",
## model_type = glb_model_type, : model's bestTune found at an extreme of
## tuneGrid for parameter: cp
## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7,
## cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2,
## surrogatestyle = 0, maxdepth = 30, xval = 0))
## n= 969
##
## CP nsplit rel error
## 1 0.527964206 0 1.0000000
## 2 0.134228188 1 0.4720358
## 3 0.004194631 2 0.3378076
##
## Variable importance
## biddable startprice.diff
## 64 36
##
## Node number 1: 969 observations, complexity param=0.5279642
## predicted class=N expected loss=0.4613003 P(node) =1
## class counts: 522 447
## probabilities: 0.539 0.461
## left son=2 (539 obs) right son=3 (430 obs)
## Primary splits:
## biddable < 0.5 to the left, improve=151.58290, (0 missing)
## startprice.diff < 62.89456 to the right, improve= 82.96307, (0 missing)
## Surrogate splits:
## startprice.diff < 250.1071 to the left, agree=0.562, adj=0.014, (0 split)
##
## Node number 2: 539 observations
## predicted class=N expected loss=0.2115028 P(node) =0.5562436
## class counts: 425 114
## probabilities: 0.788 0.212
##
## Node number 3: 430 observations, complexity param=0.1342282
## predicted class=Y expected loss=0.2255814 P(node) =0.4437564
## class counts: 97 333
## probabilities: 0.226 0.774
## left son=6 (80 obs) right son=7 (350 obs)
## Primary splits:
## startprice.diff < 63.51092 to the right, improve=82.90292, (0 missing)
##
## Node number 6: 80 observations
## predicted class=N expected loss=0.125 P(node) =0.08255934
## class counts: 70 10
## probabilities: 0.875 0.125
##
## Node number 7: 350 observations
## predicted class=Y expected loss=0.07714286 P(node) =0.3611971
## class counts: 27 323
## probabilities: 0.077 0.923
##
## n= 969
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 969 447 N (0.53869969 0.46130031)
## 2) biddable< 0.5 539 114 N (0.78849722 0.21150278) *
## 3) biddable>=0.5 430 97 Y (0.22558140 0.77441860)
## 6) startprice.diff>=63.51092 80 10 N (0.87500000 0.12500000) *
## 7) startprice.diff< 63.51092 350 27 Y (0.07714286 0.92285714) *
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.6313559
## 3 0.2 0.6541916
## 4 0.3 0.8105395
## 5 0.4 0.8105395
## 6 0.5 0.8105395
## 7 0.6 0.8105395
## 8 0.7 0.8105395
## 9 0.8 0.8105395
## 10 0.9 0.8105395
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.Max.cor.Y.rpart.N
## 1 N 495
## 2 Y 124
## sold.fctr.predict.Max.cor.Y.rpart.Y
## 1 27
## 2 323
## Prediction
## Reference N Y
## N 495 27
## Y 124 323
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.441692e-01 6.814949e-01 8.197763e-01 8.664485e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 1.762753e-90 5.612287e-15
## [1] " calling mypredict_mdl for OOB:"
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.6339217
## 3 0.2 0.6633907
## 4 0.3 0.8102981
## 5 0.4 0.8102981
## 6 0.5 0.8102981
## 7 0.6 0.8102981
## 8 0.7 0.8102981
## 9 0.8 0.8102981
## 10 0.9 0.8102981
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.Max.cor.Y.rpart.N
## 1 N 451
## 2 Y 114
## sold.fctr.predict.Max.cor.Y.rpart.Y
## 1 26
## 2 299
## Prediction
## Reference N Y
## N 451 26
## Y 114 299
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.426966e-01 6.791719e-01 8.170871e-01 8.660125e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 1.090657e-83 1.940362e-13
## model_id model_method feats max.nTuningRuns
## 1 Max.cor.Y.rpart rpart biddable, startprice.diff 3
## min.elapsedtime.everything min.elapsedtime.final max.auc.fit
## 1 0.959 0.012 0.8434283
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.9 0.8105395 0.8276574
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.8197763 0.8664485 0.6497643 0.8469855
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.9 0.8102981 0.8426966
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.8170871 0.8660125 0.6791719
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.01087271 0.02515063
# Used to compare vs. Interactions.High.cor.Y and/or Max.cor.Y.TmSrs
ret_lst <- myfit_mdl(model_id="Max.cor.Y",
model_method=ifelse(glb_is_regression, "lm",
ifelse(glb_is_binomial, "glm", "rpart")),
model_type=glb_model_type,
indep_vars_vctr=max_cor_y_x_vars,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
## [1] "fitting model: Max.cor.Y.glm"
## [1] " indep_vars: biddable, startprice.diff"
## Aggregating results
## Fitting final model on full training set
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3288 -0.7074 -0.2424 0.5858 2.7650
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.318196 0.113712 -11.592 <2e-16 ***
## biddable 3.077594 0.185065 16.630 <2e-16 ***
## startprice.diff -0.011328 0.001175 -9.639 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1337.51 on 968 degrees of freedom
## Residual deviance: 877.46 on 966 degrees of freedom
## AIC: 883.46
##
## Number of Fisher Scoring iterations: 5
##
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.63135593
## 2 0.1 0.68725869
## 3 0.2 0.71942446
## 4 0.3 0.71295337
## 5 0.4 0.74831461
## 6 0.5 0.77083333
## 7 0.6 0.79616307
## 8 0.7 0.80593325
## 9 0.8 0.77165354
## 10 0.9 0.08102345
## 11 1.0 0.00000000
## [1] "Classifier Probability Threshold: 0.7000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.Max.cor.Y.glm.N
## 1 N 486
## 2 Y 121
## sold.fctr.predict.Max.cor.Y.glm.Y
## 1 36
## 2 326
## Prediction
## Reference N Y
## N 486 36
## Y 121 326
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.379773e-01 6.694866e-01 8.132413e-01 8.606386e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 1.523362e-86 2.028877e-11
## [1] " calling mypredict_mdl for OOB:"
## threshold f.score
## 1 0.0 0.63392172
## 2 0.1 0.68661679
## 3 0.2 0.72303207
## 4 0.3 0.72108844
## 5 0.4 0.74673008
## 6 0.5 0.77057357
## 7 0.6 0.79021879
## 8 0.7 0.80478088
## 9 0.8 0.79096045
## 10 0.9 0.06960557
## 11 1.0 0.00000000
## [1] "Classifier Probability Threshold: 0.7000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.Max.cor.Y.glm.N
## 1 N 440
## 2 Y 110
## sold.fctr.predict.Max.cor.Y.glm.Y
## 1 37
## 2 303
## Prediction
## Reference N Y
## N 440 37
## Y 110 303
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.348315e-01 6.639612e-01 8.087745e-01 8.586487e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 4.091305e-79 2.877120e-09
## model_id model_method feats max.nTuningRuns
## 1 Max.cor.Y.glm glm biddable, startprice.diff 1
## min.elapsedtime.everything min.elapsedtime.final max.auc.fit
## 1 0.955 0.013 0.8591461
## opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.7 0.8059333 0.7987616
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.8132413 0.8606386 0.5929577 0.8659702
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.7 0.8047809 0.8348315
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1 0.8087745 0.8586487 0.6639612 883.4623
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.03879246 0.07822035
if (!is.null(glb_date_vars) &&
(sum(grepl(paste(glb_date_vars, "\\.day\\.minutes\\.poly\\.", sep=""),
names(glb_allobs_df))) > 0)) {
# ret_lst <- myfit_mdl(model_id="Max.cor.Y.TmSrs.poly1",
# model_method=ifelse(glb_is_regression, "lm",
# ifelse(glb_is_binomial, "glm", "rpart")),
# model_type=glb_model_type,
# indep_vars_vctr=c(max_cor_y_x_vars, paste0(glb_date_vars, ".day.minutes")),
# rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
# fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
# n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
#
ret_lst <- myfit_mdl(model_id="Max.cor.Y.TmSrs.poly",
model_method=ifelse(glb_is_regression, "lm",
ifelse(glb_is_binomial, "glm", "rpart")),
model_type=glb_model_type,
indep_vars_vctr=c(max_cor_y_x_vars,
grep(paste(glb_date_vars, "\\.day\\.minutes\\.poly\\.", sep=""),
names(glb_allobs_df), value=TRUE)),
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
}
# Interactions.High.cor.Y
if (length(int_feats <- setdiff(unique(glb_feats_df$cor.high.X), NA)) > 0) {
# lm & glm handle interaction terms; rpart & rf do not
if (glb_is_regression || glb_is_binomial) {
indep_vars_vctr <-
c(max_cor_y_x_vars, paste(max_cor_y_x_vars[1], int_feats, sep=":"))
} else { indep_vars_vctr <- union(max_cor_y_x_vars, int_feats) }
ret_lst <- myfit_mdl(model_id="Interact.High.cor.Y",
model_method=ifelse(glb_is_regression, "lm",
ifelse(glb_is_binomial, "glm", "rpart")),
model_type=glb_model_type,
indep_vars_vctr,
glb_rsp_var, glb_rsp_var_out,
fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
}
## [1] "fitting model: Interact.High.cor.Y.glm"
## [1] " indep_vars: biddable, startprice.diff, biddable:D.terms.n.post.stop, biddable:D.TfIdf.sum.post.stem, biddable:D.ratio.nstopwrds.nwrds, biddable:D.npnct06.log, biddable:D.nchrs.log, biddable:D.terms.n.post.stop.log, biddable:cellular.fctr, biddable:D.nwrds.unq.log"
## Aggregating results
## Fitting final model on full training set
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.3460 -0.7080 -0.2415 0.5570 2.7490
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.315185 0.113499 -11.588 <2e-16
## biddable 3.431627 1.432235 2.396 0.0166
## startprice.diff -0.011135 0.001186 -9.389 <2e-16
## `biddable:D.terms.n.post.stop` -0.472748 0.390367 -1.211 0.2259
## `biddable:D.TfIdf.sum.post.stem` 0.272868 0.214844 1.270 0.2041
## `biddable:D.ratio.nstopwrds.nwrds` -0.196093 1.423291 -0.138 0.8904
## `biddable:D.npnct06.log` -0.443007 0.765464 -0.579 0.5628
## `biddable:D.nchrs.log` -2.042901 1.323123 -1.544 0.1226
## `biddable:D.terms.n.post.stop.log` 13.051918 12.683531 1.029 0.3035
## `biddable:cellular.fctr1` 0.121261 0.324337 0.374 0.7085
## `biddable:cellular.fctrUnknown` -0.935810 0.377576 -2.478 0.0132
## `biddable:D.nwrds.unq.log` -8.093449 11.778183 -0.687 0.4920
##
## (Intercept) ***
## biddable *
## startprice.diff ***
## `biddable:D.terms.n.post.stop`
## `biddable:D.TfIdf.sum.post.stem`
## `biddable:D.ratio.nstopwrds.nwrds`
## `biddable:D.npnct06.log`
## `biddable:D.nchrs.log`
## `biddable:D.terms.n.post.stop.log`
## `biddable:cellular.fctr1`
## `biddable:cellular.fctrUnknown` *
## `biddable:D.nwrds.unq.log`
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1337.51 on 968 degrees of freedom
## Residual deviance: 863.84 on 957 degrees of freedom
## AIC: 887.84
##
## Number of Fisher Scoring iterations: 5
##
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.6867284
## 3 0.2 0.7212230
## 4 0.3 0.7160752
## 5 0.4 0.7483146
## 6 0.5 0.7720930
## 7 0.6 0.7908102
## 8 0.7 0.7904642
## 9 0.8 0.7425474
## 10 0.9 0.2758621
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 469
## 2 Y 120
## sold.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 53
## 2 327
## Prediction
## Reference N Y
## N 469 53
## Y 120 327
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.214654e-01 6.368687e-01 7.958716e-01 8.450880e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 1.291609e-76 5.224287e-07
## [1] " calling mypredict_mdl for OOB:"
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.6854772
## 3 0.2 0.7235694
## 4 0.3 0.7186441
## 5 0.4 0.7467301
## 6 0.5 0.7686567
## 7 0.6 0.7865459
## 8 0.7 0.7838926
## 9 0.8 0.7349927
## 10 0.9 0.2541667
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.Interact.High.cor.Y.glm.N
## 1 N 421
## 2 Y 109
## sold.fctr.predict.Interact.High.cor.Y.glm.Y
## 1 56
## 2 304
## Prediction
## Reference N Y
## N 421 56
## Y 109 304
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.146067e-01 6.240496e-01 7.874870e-01 8.396247e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 3.944312e-68 5.161425e-05
## model_id model_method
## 1 Interact.High.cor.Y.glm glm
## feats
## 1 biddable, startprice.diff, biddable:D.terms.n.post.stop, biddable:D.TfIdf.sum.post.stem, biddable:D.ratio.nstopwrds.nwrds, biddable:D.npnct06.log, biddable:D.nchrs.log, biddable:D.terms.n.post.stop.log, biddable:cellular.fctr, biddable:D.nwrds.unq.log
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 1 0.991 0.015
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.861739 0.6 0.7908102 0.7997936
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.7958716 0.845088 0.594392 0.8576352
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.6 0.7865459 0.8146067
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1 0.787487 0.8396247 0.6240496 887.8417
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.03719468 0.07559072
# Low.cor.X
# if (glb_is_classification && glb_is_binomial)
# indep_vars_vctr <- subset(glb_feats_df, is.na(cor.high.X) &
# is.ConditionalX.y &
# (exclude.as.feat != 1))[, "id"] else
indep_vars_vctr <- subset(glb_feats_df, is.na(cor.high.X) & !myNearZV &
(exclude.as.feat != 1))[, "id"]
myadjust_interaction_feats <- function(vars_vctr) {
for (feat in subset(glb_feats_df, !is.na(interaction.feat))$id)
if (feat %in% vars_vctr)
vars_vctr <- union(setdiff(vars_vctr, feat),
paste0(glb_feats_df[glb_feats_df$id == feat, "interaction.feat"], ":",
feat))
return(vars_vctr)
}
indep_vars_vctr <- myadjust_interaction_feats(indep_vars_vctr)
ret_lst <- myfit_mdl(model_id="Low.cor.X",
model_method=ifelse(glb_is_regression, "lm",
ifelse(glb_is_binomial, "glm", "rpart")),
indep_vars_vctr=indep_vars_vctr,
model_type=glb_model_type,
glb_rsp_var, glb_rsp_var_out,
fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
n_cv_folds=glb_n_cv_folds, tune_models_df=NULL)
## [1] "fitting model: Low.cor.X.glm"
## [1] " indep_vars: biddable, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, prdl.my.descr.fctr, color.fctr, D.npnct08.log, D.npnct06.log, D.npnct28.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Fitting final model on full training set
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: not plotting observations with leverage one:
## 354, 619
## Warning: not plotting observations with leverage one:
## 354, 619
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.7920 -0.6214 -0.1062 0.4960 3.2701
##
## Coefficients: (53 not defined because of singularities)
## Estimate Std. Error
## (Intercept) -7.334e+00 6.651e+00
## biddable 3.077e+00 2.274e-01
## D.npnct15.log 1.677e+00 9.240e-01
## D.npnct03.log 3.299e-01 1.535e+00
## D.terms.n.stem.stop.Ratio 4.797e+00 6.085e+00
## D.ratio.sum.TfIdf.nwrds -4.877e-01 4.544e-01
## D.npnct01.log 2.717e-01 7.962e-01
## .rnorm 6.336e-03 9.519e-02
## D.TfIdf.sum.stem.stop.Ratio 1.235e+00 4.224e+00
## storage.fctr16 -1.215e-01 5.683e-01
## storage.fctr32 -2.054e-01 5.949e-01
## storage.fctr64 3.864e-01 5.999e-01
## storage.fctrUnknown 2.769e-01 7.260e-01
## D.npnct11.log 2.648e-01 3.885e-01
## D.npnct10.log -1.511e+00 1.940e+00
## `prdl.my.descr.fctrUnknown#1` 7.109e-01 1.118e+00
## `prdl.my.descr.fctriPad 1#0` 7.377e-01 5.739e-01
## `prdl.my.descr.fctriPad 1#1` 2.353e+00 1.186e+00
## `prdl.my.descr.fctriPad 2#0` 1.744e+00 6.851e-01
## `prdl.my.descr.fctriPad 2#1` 1.857e+00 1.172e+00
## `prdl.my.descr.fctriPad 3+#0` 1.173e+00 5.493e-01
## `prdl.my.descr.fctriPad 3+#1` 8.844e-01 1.185e+00
## `prdl.my.descr.fctriPadAir#0` 3.489e-01 5.258e-01
## `prdl.my.descr.fctriPadAir#1` 1.775e+00 1.134e+00
## `prdl.my.descr.fctriPadmini 2+#0` 2.045e-01 5.371e-01
## `prdl.my.descr.fctriPadmini 2+#1` 1.785e+00 1.630e+00
## `prdl.my.descr.fctriPadmini#0` 5.642e-01 5.201e-01
## `prdl.my.descr.fctriPadmini#1` 1.643e+00 1.328e+00
## color.fctrGold 3.371e-02 5.518e-01
## `color.fctrSpace Gray` -1.404e-01 3.796e-01
## color.fctrUnknown -2.897e-01 2.730e-01
## color.fctrWhite -2.466e-01 3.007e-01
## D.npnct08.log 6.281e-01 7.927e-01
## D.npnct06.log -1.974e+00 9.406e-01
## D.npnct28.log -2.853e+00 3.334e+03
## D.npnct12.log -9.026e-01 8.708e-01
## D.npnct09.log -9.265e+00 2.854e+03
## D.ndgts.log 6.392e-01 4.211e-01
## cellular.fctr1 -3.674e-02 2.314e-01
## cellular.fctrUnknown -6.176e-01 4.605e-01
## D.npnct14.log -1.013e+00 9.205e-01
## D.terms.n.post.stop -1.688e-01 8.496e-02
## D.npnct05.log -3.690e+00 1.766e+00
## `condition.fctrFor parts or not working` 6.088e-01 3.918e-01
## `condition.fctrManufacturer refurbished` 8.394e-01 5.850e-01
## condition.fctrNew -2.849e-01 3.099e-01
## `condition.fctrNew other (see details)` 8.380e-01 5.025e-01
## `condition.fctrSeller refurbished` -6.690e-01 4.741e-01
## idseq.my -1.261e-04 2.150e-04
## startprice.diff -1.354e-02 1.547e-03
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2` 1.645e+00 1.004e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2` 6.192e-02 1.058e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2` -8.743e-02 9.407e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2` 1.082e+00 8.614e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2` 7.218e-02 7.851e-01
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2` 6.090e-01 1.917e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2` 6.295e-02 1.408e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3` 1.142e+00 2.018e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3` 7.483e-02 1.232e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3` -5.813e-01 1.279e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3` 4.610e-01 1.360e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3` -6.393e-01 1.223e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` 7.449e-01 2.220e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3` 2.777e-01 1.264e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4` 1.803e+01 2.669e+03
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4` 7.096e-02 1.155e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4` 6.105e-01 1.113e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4` -1.451e+01 8.458e+02
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4` -6.011e-01 1.130e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` -1.433e+01 3.956e+03
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4` 1.722e+01 1.542e+03
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5` 1.681e+01 1.401e+03
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5` 6.595e-02 1.161e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5` 2.866e+00 9.585e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5` 9.570e-01 2.942e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5` 1.658e+00 1.544e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6` -1.513e+00 1.413e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6` 2.573e-01 1.439e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6` 7.561e-01 1.431e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7` -1.582e+01 2.280e+03
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7` -3.387e-01 2.179e+00
## z value Pr(>|z|)
## (Intercept) -1.103 0.27013
## biddable 13.530 < 2e-16 ***
## D.npnct15.log 1.815 0.06953 .
## D.npnct03.log 0.215 0.82984
## D.terms.n.stem.stop.Ratio 0.788 0.43048
## D.ratio.sum.TfIdf.nwrds -1.073 0.28314
## D.npnct01.log 0.341 0.73288
## .rnorm 0.067 0.94693
## D.TfIdf.sum.stem.stop.Ratio 0.292 0.76997
## storage.fctr16 -0.214 0.83066
## storage.fctr32 -0.345 0.72987
## storage.fctr64 0.644 0.51952
## storage.fctrUnknown 0.381 0.70288
## D.npnct11.log 0.681 0.49558
## D.npnct10.log -0.779 0.43605
## `prdl.my.descr.fctrUnknown#1` 0.636 0.52476
## `prdl.my.descr.fctriPad 1#0` 1.285 0.19869
## `prdl.my.descr.fctriPad 1#1` 1.985 0.04719 *
## `prdl.my.descr.fctriPad 2#0` 2.545 0.01093 *
## `prdl.my.descr.fctriPad 2#1` 1.584 0.11317
## `prdl.my.descr.fctriPad 3+#0` 2.135 0.03280 *
## `prdl.my.descr.fctriPad 3+#1` 0.746 0.45540
## `prdl.my.descr.fctriPadAir#0` 0.664 0.50697
## `prdl.my.descr.fctriPadAir#1` 1.566 0.11735
## `prdl.my.descr.fctriPadmini 2+#0` 0.381 0.70337
## `prdl.my.descr.fctriPadmini 2+#1` 1.095 0.27357
## `prdl.my.descr.fctriPadmini#0` 1.085 0.27801
## `prdl.my.descr.fctriPadmini#1` 1.237 0.21601
## color.fctrGold 0.061 0.95128
## `color.fctrSpace Gray` -0.370 0.71149
## color.fctrUnknown -1.062 0.28845
## color.fctrWhite -0.820 0.41217
## D.npnct08.log 0.792 0.42810
## D.npnct06.log -2.099 0.03581 *
## D.npnct28.log -0.001 0.99932
## D.npnct12.log -1.036 0.29997
## D.npnct09.log -0.003 0.99741
## D.ndgts.log 1.518 0.12907
## cellular.fctr1 -0.159 0.87385
## cellular.fctrUnknown -1.341 0.17992
## D.npnct14.log -1.101 0.27106
## D.terms.n.post.stop -1.987 0.04690 *
## D.npnct05.log -2.089 0.03667 *
## `condition.fctrFor parts or not working` 1.554 0.12020
## `condition.fctrManufacturer refurbished` 1.435 0.15132
## condition.fctrNew -0.919 0.35786
## `condition.fctrNew other (see details)` 1.668 0.09536 .
## `condition.fctrSeller refurbished` -1.411 0.15819
## idseq.my -0.586 0.55761
## startprice.diff -8.754 < 2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2` 1.639 0.10132
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2` 0.058 0.95335
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2` -0.093 0.92595
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2` 1.256 0.20896
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2` 0.092 0.92675
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2` 0.318 0.75071
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2` 0.045 0.96434
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3` 0.566 0.57131
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3` 0.061 0.95156
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3` -0.454 0.64958
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3` 0.339 0.73473
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3` -0.523 0.60112
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` 0.335 0.73727
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3` 0.220 0.82603
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4` 0.007 0.99461
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4` 0.061 0.95099
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4` 0.549 0.58330
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4` -0.017 0.98631
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4` -0.532 0.59484
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` -0.004 0.99711
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4` 0.011 0.99109
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5` 0.012 0.99043
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5` 0.057 0.95471
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5` 2.990 0.00279 **
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5` 0.325 0.74495
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5` 1.073 0.28307
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6` -1.071 0.28408
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6` 0.179 0.85809
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6` 0.529 0.59712
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7` -0.007 0.99446
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7` -0.155 0.87647
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1337.51 on 968 degrees of freedom
## Residual deviance: 752.13 on 888 degrees of freedom
## AIC: 914.13
##
## Number of Fisher Scoring iterations: 16
##
## [1] " calling mypredict_mdl for fit:"
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.7225914
## 3 0.2 0.7670980
## 4 0.3 0.8012295
## 5 0.4 0.8096810
## 6 0.5 0.8101852
## 7 0.6 0.8043478
## 8 0.7 0.7863464
## 9 0.8 0.7419355
## 10 0.9 0.5669291
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.Low.cor.X.glm.N
## 1 N 455
## 2 Y 97
## sold.fctr.predict.Low.cor.X.glm.Y
## 1 67
## 2 350
## Prediction
## Reference N Y
## N 455 67
## Y 97 350
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.307534e-01 6.578169e-01 8.056321e-01 8.538452e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 4.228202e-82 2.354218e-02
## [1] " calling mypredict_mdl for OOB:"
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.6990291
## 3 0.2 0.7308838
## 4 0.3 0.7407407
## 5 0.4 0.7482517
## 6 0.5 0.7600487
## 7 0.6 0.7582697
## 8 0.7 0.7456258
## 9 0.8 0.6577778
## 10 0.9 0.4745167
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.Low.cor.X.glm.N
## 1 N 381
## 2 Y 101
## sold.fctr.predict.Low.cor.X.glm.Y
## 1 96
## 2 312
## Prediction
## Reference N Y
## N 381 96
## Y 101 312
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.786517e-01 5.546405e-01 7.499158e-01 8.055293e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 3.954556e-51 7.756532e-01
## model_id model_method
## 1 Low.cor.X.glm glm
## feats
## 1 biddable, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, prdl.my.descr.fctr, color.fctr, D.npnct08.log, D.npnct06.log, D.npnct28.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 1 1.741 0.39
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.9028388 0.5 0.8101852 0.7688338
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.8056321 0.8538452 0.5344407 0.8382546
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.5 0.7600487 0.7786517
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1 0.7499158 0.8055293 0.5546405 914.127
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.01966208 0.03929384
rm(ret_lst)
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.models", major.inc=FALSE)
## label step_major step_minor bgn end elapsed
## 10 fit.models 7 0 67.216 92.532 25.317
## 11 fit.models 7 1 92.533 NA NA
fit.models_1_chunk_df <- myadd_chunk(NULL, "fit.models_1_bgn")
## label step_major step_minor bgn end elapsed
## 1 fit.models_1_bgn 1 0 96.37 NA NA
# Options:
# 1. rpart & rf manual tuning
# 2. rf without pca (default: with pca)
#stop(here"); sav_models_lst <- glb_models_lst; sav_models_df <- glb_models_df
#glb_models_lst <- sav_models_lst; glb_models_df <- sav_models_df
# All X that is not user excluded
for (model_id_pfx in c("All.X", "All.Interact.X")) {
#model_id_pfx <- "All.X"
indep_vars_vctr <- subset(glb_feats_df, !myNearZV &
(exclude.as.feat != 1))[, "id"]
if (model_id_pfx == "All.Interact.X") {
# !_sp
interact_vars_vctr <- c(
"idseq.my", "D.ratio.sum.TfIdf.nwrds", "D.TfIdf.sum.stem.stop.Ratio",
"D.npnct15.log", "D.npnct03.log", "D.nwrds.log", "D.nchrs.log")
indep_vars_vctr <- union(setdiff(indep_vars_vctr, interact_vars_vctr),
paste(glb_category_var, interact_vars_vctr,
sep=ifelse(grepl("\\.fctr", glb_category_var), "*", ".fctr*")))
indep_vars_vctr <- union(setdiff(indep_vars_vctr,
c("startprice.diff", "biddable", "cellular.fctr", "carrier.fctr")),
c("startprice.diff*biddable", "cellular.fctr*carrier.fctr"))
###
# _sp only
# interact_vars_vctr <- c(
# "D.nchrs.log", "D.TfIdf.sum.stem.stop.Ratio",
# "D.npnct16.log", "D.npnct01.log", "D.nstopwrds.log", "D.npnct08.log",
# "D.terms.n.post.stop", "D.terms.n.post.stem",
# "biddable", "condition.fctr",
# # "cellular.fctr", "carrier.fctr",
# "color.fctr", "storage.fctr", "idseq.my")
# indep_vars_vctr <- union(setdiff(indep_vars_vctr, interact_vars_vctr),
# paste(glb_category_var, interact_vars_vctr,
# sep=ifelse(grepl("\\.fctr", glb_category_var), "*", ".fctr*")))
# indep_vars_vctr <- union(setdiff(indep_vars_vctr,
# c("cellular.fctr", "carrier.fctr")),
# c("cellular.fctr*carrier.fctr"))
###
}
indep_vars_vctr <- myadjust_interaction_feats(indep_vars_vctr)
#stop(here")
for (method in glb_models_method_vctr) {
fit.models_1_chunk_df <- myadd_chunk(fit.models_1_chunk_df,
paste0("fit.models_1_", method), major.inc=TRUE)
if (method %in% c("rpart", "rf")) {
# rpart: fubar's the tree
# rf: skip the scenario w/ .rnorm for speed
indep_vars_vctr <- setdiff(indep_vars_vctr, c(".rnorm"))
model_id <- paste0(model_id_pfx, ".no.rnorm")
} else model_id <- model_id_pfx
ret_lst <- myfit_mdl(model_id=model_id, model_method=method,
indep_vars_vctr=indep_vars_vctr,
model_type=glb_model_type,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
n_cv_folds=glb_n_cv_folds, tune_models_df=glb_tune_models_df)
# If All.X.glm is less accurate than Low.Cor.X.glm
# check NA coefficients & filter appropriate terms in indep_vars_vctr
# if (method == "glm") {
# orig_glm <- glb_models_lst[[paste0(model_id, ".", model_method)]]$finalModel
# orig_glm <- glb_models_lst[["All.X.glm"]]$finalModel; print(summary(orig_glm))
# vif_orig_glm <- vif(orig_glm); print(vif_orig_glm)
# print(vif_orig_glm[!is.na(vif_orig_glm) & (vif_orig_glm == Inf)])
# print(which.max(vif_orig_glm))
# print(sort(vif_orig_glm[vif_orig_glm >= 1.0e+03], decreasing=TRUE))
# glb_fitobs_df[c(1143, 3637, 3953, 4105), c("UniqueID", "Popular", "H.P.quandary", "Headline")]
# glb_feats_df[glb_feats_df$id %in% grep("[HSA]\\.nchrs.log", glb_feats_df$id, value=TRUE) | glb_feats_df$cor.high.X %in% grep("[HSA]\\.nchrs.log", glb_feats_df$id, value=TRUE), ]
# glb_feats_df[glb_feats_df$id %in% grep("[HSA]\\.npnct14.log", glb_feats_df$id, value=TRUE) | glb_feats_df$cor.high.X %in% grep("[HSA]\\.npnct14.log", glb_feats_df$id, value=TRUE), ]
# glb_feats_df[glb_feats_df$id %in% grep("[HSA]\\.T.scen", glb_feats_df$id, value=TRUE) | glb_feats_df$cor.high.X %in% grep("[HSA]\\.T.scen", glb_feats_df$id, value=TRUE), ]
# glb_feats_df[glb_feats_df$id %in% grep("[HSA]\\.P.first", glb_feats_df$id, value=TRUE) | glb_feats_df$cor.high.X %in% grep("[HSA]\\.P.first", glb_feats_df$id, value=TRUE), ]
# all.equal(glb_allobs_df$S.nuppr.log, glb_allobs_df$A.nuppr.log)
# all.equal(glb_allobs_df$S.npnct19.log, glb_allobs_df$A.npnct19.log)
# all.equal(glb_allobs_df$S.P.year.colon, glb_allobs_df$A.P.year.colon)
# all.equal(glb_allobs_df$S.T.share, glb_allobs_df$A.T.share)
# all.equal(glb_allobs_df$H.T.clip, glb_allobs_df$H.P.daily.clip.report)
# cor(glb_allobs_df$S.T.herald, glb_allobs_df$S.T.tribun)
# mydsp_obs(Abstract.contains="[Dd]iar", cols=("Abstract"), all=TRUE)
# mydsp_obs(Abstract.contains="[Ss]hare", cols=("Abstract"), all=TRUE)
# subset(glb_feats_df, cor.y.abs <= glb_feats_df[glb_feats_df$id == ".rnorm", "cor.y.abs"])
# corxx_mtrx <- cor(data.matrix(glb_allobs_df[, setdiff(names(glb_allobs_df), myfind_chr_cols_df(glb_allobs_df))]), use="pairwise.complete.obs"); abs_corxx_mtrx <- abs(corxx_mtrx); diag(abs_corxx_mtrx) <- 0
# which.max(abs_corxx_mtrx["S.T.tribun", ])
# abs_corxx_mtrx["A.npnct08.log", "S.npnct08.log"]
# step_glm <- step(orig_glm)
# }
# Since caret does not optimize rpart well
# if (method == "rpart")
# ret_lst <- myfit_mdl(model_id=paste0(model_id_pfx, ".cp.0"), model_method=method,
# indep_vars_vctr=indep_vars_vctr,
# model_type=glb_model_type,
# rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
# fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
# n_cv_folds=0, tune_models_df=data.frame(parameter="cp", min=0.0, max=0.0, by=0.1))
}
}
## label step_major step_minor bgn end elapsed
## 1 fit.models_1_bgn 1 0 96.370 96.379 0.009
## 2 fit.models_1_glm 2 0 96.379 NA NA
## [1] "fitting model: All.X.glm"
## [1] " indep_vars: biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Fitting final model on full training set
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: not plotting observations with leverage one:
## 354, 619, 935
## Warning: not plotting observations with leverage one:
## 354, 619, 935
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5545 -0.6184 -0.0759 0.4772 3.3084
##
## Coefficients: (57 not defined because of singularities)
## Estimate Std. Error
## (Intercept) 8.072e+02 3.499e+03
## biddable 3.142e+00 2.355e-01
## D.ratio.nstopwrds.nwrds -1.480e+01 7.692e+00
## D.npnct15.log 1.196e+00 9.929e-01
## D.npnct03.log 6.005e-01 1.800e+00
## D.terms.n.stem.stop.Ratio -7.755e+02 3.497e+03
## D.ratio.sum.TfIdf.nwrds -1.552e+00 1.278e+00
## D.npnct01.log -2.181e-01 9.103e-01
## .rnorm 1.393e-02 9.744e-02
## D.TfIdf.sum.stem.stop.Ratio -1.866e+01 2.335e+01
## storage.fctr16 -1.068e-01 5.884e-01
## storage.fctr32 -2.156e-01 6.139e-01
## storage.fctr64 4.166e-01 6.179e-01
## storage.fctrUnknown 2.144e-01 7.416e-01
## D.npnct11.log 1.743e-01 4.535e-01
## D.npnct10.log -9.990e-01 2.020e+00
## D.TfIdf.sum.post.stop -2.801e+00 3.350e+00
## D.TfIdf.sum.post.stem 3.311e+00 3.511e+00
## D.sum.TfIdf NA NA
## `prdl.my.descr.fctrUnknown#1` -9.192e-01 6.700e+00
## `prdl.my.descr.fctriPad 1#0` 7.271e-01 5.812e-01
## `prdl.my.descr.fctriPad 1#1` 6.051e-01 6.663e+00
## `prdl.my.descr.fctriPad 2#0` 1.585e+00 6.915e-01
## `prdl.my.descr.fctriPad 2#1` -2.572e-01 6.637e+00
## `prdl.my.descr.fctriPad 3+#0` 1.166e+00 5.560e-01
## `prdl.my.descr.fctriPad 3+#1` -1.127e+00 6.652e+00
## `prdl.my.descr.fctriPadAir#0` 3.253e-01 5.328e-01
## `prdl.my.descr.fctriPadAir#1` 3.825e-02 6.648e+00
## `prdl.my.descr.fctriPadmini 2+#0` 1.514e-01 5.438e-01
## `prdl.my.descr.fctriPadmini 2+#1` 8.256e-02 6.674e+00
## `prdl.my.descr.fctriPadmini#0` 4.872e-01 5.278e-01
## `prdl.my.descr.fctriPadmini#1` -7.116e-01 6.725e+00
## D.npnct13.log -4.193e-01 4.541e-01
## color.fctrGold -6.916e-02 5.613e-01
## `color.fctrSpace Gray` -1.319e-01 3.886e-01
## color.fctrUnknown -3.560e-01 2.815e-01
## color.fctrWhite -2.679e-01 3.073e-01
## D.npnct08.log 4.819e-01 8.435e-01
## D.npnct16.log 1.740e+00 2.224e+00
## D.npnct24.log NA NA
## D.nstopwrds.log 5.020e+00 2.342e+00
## D.npnct06.log -3.917e+00 2.377e+00
## D.npnct28.log -3.344e+00 3.310e+03
## D.nuppr.log -2.370e+00 5.392e+00
## D.nchrs.log 1.963e+00 6.151e+00
## D.nwrds.log -5.166e+00 3.283e+00
## D.npnct12.log -7.432e-01 8.740e-01
## carrier.fctrNone 5.695e-01 7.034e-01
## carrier.fctrOther 3.091e+01 5.595e+03
## carrier.fctrSprint 2.279e-01 7.981e-01
## `carrier.fctrT-Mobile` -6.923e-01 1.084e+00
## carrier.fctrUnknown -3.964e-02 5.369e-01
## carrier.fctrVerizon 8.074e-01 4.802e-01
## D.npnct09.log -9.223e+00 2.854e+03
## D.ndgts.log 3.259e-01 6.455e-01
## D.nwrds.unq.log 8.435e+02 3.885e+03
## D.terms.n.post.stem.log NA NA
## D.terms.n.post.stop.log -8.454e+02 3.884e+03
## cellular.fctr1 3.655e-01 6.355e-01
## cellular.fctrUnknown NA NA
## D.npnct14.log -1.486e+00 1.009e+00
## D.terms.n.post.stem -2.330e+00 2.038e+01
## D.terms.n.post.stop 2.348e+00 2.032e+01
## D.npnct05.log -2.742e+00 1.866e+00
## `condition.fctrFor parts or not working` 3.423e-01 4.096e-01
## `condition.fctrManufacturer refurbished` 6.185e-01 5.963e-01
## condition.fctrNew -3.135e-01 3.180e-01
## `condition.fctrNew other (see details)` 7.964e-01 5.189e-01
## `condition.fctrSeller refurbished` -6.440e-01 4.927e-01
## idseq.my -7.067e-05 2.192e-04
## startprice.diff -1.398e-02 1.599e-03
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2` 1.085e+00 1.113e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2` -1.294e-01 1.119e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2` -1.512e-01 9.948e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2` 1.182e+00 8.861e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2` 3.162e-01 8.389e-01
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2` 1.056e-01 2.085e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2` 9.002e-01 1.433e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3` 5.414e-01 1.927e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3` 5.560e-02 1.276e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3` -3.174e-01 1.343e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3` 3.515e-01 1.499e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3` -4.227e-01 1.243e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` -8.845e-02 2.347e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3` 9.996e-01 1.384e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4` 1.764e+01 2.704e+03
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4` -2.491e-01 1.209e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4` 9.901e-01 1.112e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4` -1.565e+01 7.660e+02
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4` -9.742e-01 1.210e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` -1.495e+01 3.956e+03
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4` 1.781e+01 1.556e+03
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5` 1.723e+01 1.420e+03
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5` -6.353e-02 1.119e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5` 2.672e+00 1.017e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5` -1.423e+01 3.956e+03
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5` 1.985e+00 1.643e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6` -1.782e+00 1.426e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6` 7.513e-01 1.494e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6` 1.089e+00 1.582e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7` -1.467e+01 2.282e+03
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7` -4.614e-01 2.497e+00
## z value Pr(>|z|)
## (Intercept) 0.231 0.81756
## biddable 13.345 < 2e-16 ***
## D.ratio.nstopwrds.nwrds -1.924 0.05441 .
## D.npnct15.log 1.205 0.22820
## D.npnct03.log 0.334 0.73868
## D.terms.n.stem.stop.Ratio -0.222 0.82450
## D.ratio.sum.TfIdf.nwrds -1.215 0.22430
## D.npnct01.log -0.240 0.81064
## .rnorm 0.143 0.88630
## D.TfIdf.sum.stem.stop.Ratio -0.799 0.42415
## storage.fctr16 -0.182 0.85591
## storage.fctr32 -0.351 0.72549
## storage.fctr64 0.674 0.50022
## storage.fctrUnknown 0.289 0.77249
## D.npnct11.log 0.384 0.70078
## D.npnct10.log -0.494 0.62100
## D.TfIdf.sum.post.stop -0.836 0.40318
## D.TfIdf.sum.post.stem 0.943 0.34559
## D.sum.TfIdf NA NA
## `prdl.my.descr.fctrUnknown#1` -0.137 0.89089
## `prdl.my.descr.fctriPad 1#0` 1.251 0.21092
## `prdl.my.descr.fctriPad 1#1` 0.091 0.92764
## `prdl.my.descr.fctriPad 2#0` 2.292 0.02193 *
## `prdl.my.descr.fctriPad 2#1` -0.039 0.96909
## `prdl.my.descr.fctriPad 3+#0` 2.098 0.03594 *
## `prdl.my.descr.fctriPad 3+#1` -0.169 0.86549
## `prdl.my.descr.fctriPadAir#0` 0.611 0.54141
## `prdl.my.descr.fctriPadAir#1` 0.006 0.99541
## `prdl.my.descr.fctriPadmini 2+#0` 0.278 0.78069
## `prdl.my.descr.fctriPadmini 2+#1` 0.012 0.99013
## `prdl.my.descr.fctriPadmini#0` 0.923 0.35601
## `prdl.my.descr.fctriPadmini#1` -0.106 0.91573
## D.npnct13.log -0.923 0.35588
## color.fctrGold -0.123 0.90193
## `color.fctrSpace Gray` -0.340 0.73420
## color.fctrUnknown -1.265 0.20593
## color.fctrWhite -0.872 0.38329
## D.npnct08.log 0.571 0.56777
## D.npnct16.log 0.783 0.43385
## D.npnct24.log NA NA
## D.nstopwrds.log 2.143 0.03208 *
## D.npnct06.log -1.648 0.09933 .
## D.npnct28.log -0.001 0.99919
## D.nuppr.log -0.440 0.66029
## D.nchrs.log 0.319 0.74966
## D.nwrds.log -1.573 0.11562
## D.npnct12.log -0.850 0.39510
## carrier.fctrNone 0.810 0.41817
## carrier.fctrOther 0.006 0.99559
## carrier.fctrSprint 0.286 0.77521
## `carrier.fctrT-Mobile` -0.639 0.52313
## carrier.fctrUnknown -0.074 0.94114
## carrier.fctrVerizon 1.681 0.09271 .
## D.npnct09.log -0.003 0.99742
## D.ndgts.log 0.505 0.61369
## D.nwrds.unq.log 0.217 0.82810
## D.terms.n.post.stem.log NA NA
## D.terms.n.post.stop.log -0.218 0.82769
## cellular.fctr1 0.575 0.56524
## cellular.fctrUnknown NA NA
## D.npnct14.log -1.472 0.14099
## D.terms.n.post.stem -0.114 0.90897
## D.terms.n.post.stop 0.116 0.90801
## D.npnct05.log -1.470 0.14168
## `condition.fctrFor parts or not working` 0.836 0.40331
## `condition.fctrManufacturer refurbished` 1.037 0.29965
## condition.fctrNew -0.986 0.32419
## `condition.fctrNew other (see details)` 1.535 0.12484
## `condition.fctrSeller refurbished` -1.307 0.19125
## idseq.my -0.322 0.74722
## startprice.diff -8.746 < 2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2` 0.975 0.32964
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2` -0.116 0.90797
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2` -0.152 0.87917
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2` 1.334 0.18209
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2` 0.377 0.70623
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2` 0.051 0.95960
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2` 0.628 0.52982
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3` 0.281 0.77878
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3` 0.044 0.96525
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3` -0.236 0.81313
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3` 0.234 0.81462
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3` -0.340 0.73372
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` -0.038 0.96994
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3` 0.722 0.47007
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4` 0.007 0.99479
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4` -0.206 0.83679
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4` 0.890 0.37346
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4` -0.020 0.98370
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4` -0.805 0.42087
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` -0.004 0.99699
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4` 0.011 0.99087
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5` 0.012 0.99031
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5` -0.057 0.95473
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5` 2.627 0.00862 **
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5` -0.004 0.99713
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5` 1.208 0.22690
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6` -1.249 0.21150
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6` 0.503 0.61515
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6` 0.688 0.49129
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7` -0.006 0.99487
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7` NA NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7` -0.185 0.85337
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1337.51 on 968 degrees of freedom
## Residual deviance: 735.56 on 871 degrees of freedom
## AIC: 931.56
##
## Number of Fisher Scoring iterations: 16
##
## [1] " calling mypredict_mdl for fit:"
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.7250000
## 3 0.2 0.7715079
## 4 0.3 0.8016360
## 5 0.4 0.8171806
## 6 0.5 0.8258362
## 7 0.6 0.8082027
## 8 0.7 0.7812895
## 9 0.8 0.7466307
## 10 0.9 0.5978428
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.All.X.glm.N sold.fctr.predict.All.X.glm.Y
## 1 N 460 62
## 2 Y 89 358
## Prediction
## Reference N Y
## N 460 62
## Y 89 358
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.441692e-01 6.850937e-01 8.197763e-01 8.664485e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 1.762753e-90 3.435757e-02
## [1] " calling mypredict_mdl for OOB:"
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.6984698
## 3 0.2 0.7223340
## 4 0.3 0.7372973
## 5 0.4 0.7520185
## 6 0.5 0.7545788
## 7 0.6 0.7512821
## 8 0.7 0.7361299
## 9 0.8 0.6764706
## 10 0.9 0.4939130
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.All.X.glm.N sold.fctr.predict.All.X.glm.Y
## 1 N 380 97
## 2 Y 104 309
## Prediction
## Reference N Y
## N 380 97
## Y 104 309
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.741573e-01 5.454499e-01 7.452413e-01 8.012453e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 3.249328e-49 6.721440e-01
## model_id model_method
## 1 All.X.glm glm
## feats
## 1 biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 1 2.172 0.506
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.9069917 0.5 0.8258362 0.7647059
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.8197763 0.8664485 0.5250728 0.8308232
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.5 0.7545788 0.7741573
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1 0.7452413 0.8012453 0.5454499 931.5575
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.0298565 0.05995988
## label step_major step_minor bgn end elapsed
## 2 fit.models_1_glm 2 0 96.379 102.276 5.897
## 3 fit.models_1_bayesglm 3 0 102.277 NA NA
## [1] "fitting model: All.X.bayesglm"
## [1] " indep_vars: biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr"
## Loading required package: arm
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following object is masked from 'package:tidyr':
##
## expand
##
## Loading required package: lme4
##
## arm (Version 1.8-6, built: 2015-7-7)
##
## Working directory is /Users/bbalaji-2012/Documents/Work/Courses/MIT/Analytics_Edge_15_071x/Assignments/Kaggle_eBay_iPads
## Aggregating results
## Fitting final model on full training set
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.5556 -0.6302 -0.1345 0.5021 3.2260
##
## Coefficients:
## Estimate Std. Error
## (Intercept) -4.347e+00 7.319e+00
## biddable 3.034e+00 2.212e-01
## D.ratio.nstopwrds.nwrds -2.211e+00 2.469e+00
## D.npnct15.log 1.351e+00 9.049e-01
## D.npnct03.log 4.958e-01 1.508e+00
## D.terms.n.stem.stop.Ratio 5.028e+00 5.979e+00
## D.ratio.sum.TfIdf.nwrds -6.650e-01 5.501e-01
## D.npnct01.log 1.302e-01 7.572e-01
## .rnorm 1.010e-02 9.353e-02
## D.TfIdf.sum.stem.stop.Ratio 2.588e-01 4.491e+00
## storage.fctr16 -1.628e-01 4.902e-01
## storage.fctr32 -2.673e-01 5.127e-01
## storage.fctr64 3.920e-01 5.190e-01
## storage.fctrUnknown 1.613e-01 6.285e-01
## D.npnct11.log 1.337e-01 3.826e-01
## D.npnct10.log -8.824e-01 1.786e+00
## D.TfIdf.sum.post.stop 6.645e-02 2.969e-01
## D.TfIdf.sum.post.stem 1.028e-01 3.136e-01
## D.sum.TfIdf 1.028e-01 3.136e-01
## `prdl.my.descr.fctrUnknown#1` -3.698e-01 9.146e-01
## `prdl.my.descr.fctriPad 1#0` 5.379e-01 5.099e-01
## `prdl.my.descr.fctriPad 1#1` 7.844e-01 8.904e-01
## `prdl.my.descr.fctriPad 2#0` 1.356e+00 6.123e-01
## `prdl.my.descr.fctriPad 2#1` -4.406e-02 8.439e-01
## `prdl.my.descr.fctriPad 3+#0` 9.707e-01 4.918e-01
## `prdl.my.descr.fctriPad 3+#1` -6.777e-01 8.579e-01
## `prdl.my.descr.fctriPadAir#0` 1.637e-01 4.631e-01
## `prdl.my.descr.fctriPadAir#1` 1.365e-01 8.362e-01
## `prdl.my.descr.fctriPadmini 2+#0` -1.747e-03 4.773e-01
## `prdl.my.descr.fctriPadmini 2+#1` 1.615e-01 1.053e+00
## `prdl.my.descr.fctriPadmini#0` 3.233e-01 4.610e-01
## `prdl.my.descr.fctriPadmini#1` 2.295e-01 9.325e-01
## D.npnct13.log -2.517e-01 3.697e-01
## color.fctrGold 1.512e-03 5.147e-01
## `color.fctrSpace Gray` -1.545e-01 3.625e-01
## color.fctrUnknown -2.979e-01 2.641e-01
## color.fctrWhite -2.391e-01 2.882e-01
## D.npnct08.log 3.386e-01 8.009e-01
## D.npnct16.log 1.021e+00 1.873e+00
## D.npnct24.log 3.799e-01 2.625e+00
## D.nstopwrds.log 4.638e-01 6.751e-01
## D.npnct06.log -2.869e+00 2.005e+00
## D.npnct28.log -6.145e-02 2.225e+00
## D.nuppr.log -1.093e-01 5.010e-01
## D.nchrs.log -5.805e-02 4.869e-01
## D.nwrds.log -1.877e-01 7.920e-01
## D.npnct12.log -7.628e-01 8.262e-01
## carrier.fctrNone 2.354e-01 1.161e+00
## carrier.fctrOther 6.332e-01 1.954e+00
## carrier.fctrSprint 1.436e-01 7.235e-01
## `carrier.fctrT-Mobile` -5.694e-01 9.226e-01
## carrier.fctrUnknown -1.200e-01 4.841e-01
## carrier.fctrVerizon 7.147e-01 4.353e-01
## D.npnct09.log -2.140e+00 7.211e+00
## D.ndgts.log 4.273e-01 4.240e-01
## D.nwrds.unq.log -1.891e-01 1.025e+00
## D.terms.n.post.stem.log -1.891e-01 1.025e+00
## D.terms.n.post.stop.log -1.910e-01 1.022e+00
## cellular.fctr1 6.424e-02 1.152e+00
## cellular.fctrUnknown -3.051e-01 1.198e+00
## D.npnct14.log -1.273e+00 8.973e-01
## D.terms.n.post.stem -6.700e-02 1.983e-01
## D.terms.n.post.stop -7.456e-02 1.963e-01
## D.npnct05.log -2.540e+00 1.456e+00
## `condition.fctrFor parts or not working` 3.891e-01 3.820e-01
## `condition.fctrManufacturer refurbished` 5.998e-01 5.553e-01
## condition.fctrNew -3.150e-01 3.061e-01
## `condition.fctrNew other (see details)` 7.596e-01 4.706e-01
## `condition.fctrSeller refurbished` -5.317e-01 4.469e-01
## idseq.my -9.664e-05 2.094e-04
## startprice.diff -1.343e-02 1.506e-03
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2` 0.000e+00 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2` 8.178e-01 8.938e-01
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2` -1.855e-01 9.086e-01
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2` -5.915e-03 8.168e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2` 8.008e-01 7.351e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2` 2.902e-01 7.110e-01
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2` 2.989e-01 1.294e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2` -5.684e-03 1.035e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3` 0.000e+00 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3` 3.285e-01 1.393e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3` 9.773e-02 1.006e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3` -3.715e-01 1.055e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3` 7.797e-02 1.128e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3` -4.728e-01 1.024e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` 2.118e-01 1.466e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3` 5.720e-02 9.629e-01
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4` 0.000e+00 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4` 1.775e+00 1.620e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4` -4.368e-02 9.571e-01
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4` 7.630e-01 9.247e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4` -1.551e+00 1.582e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4` -6.328e-01 9.515e-01
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` -1.853e-01 2.163e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4` 2.155e+00 1.635e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5` 0.000e+00 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5` 2.208e+00 1.643e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5` -1.996e-01 9.382e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5` 2.387e+00 8.443e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5` 2.750e-01 1.769e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5` 1.035e+00 1.160e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6` 0.000e+00 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6` -1.088e+00 1.076e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6` 3.753e-01 1.151e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6` 3.619e-01 1.096e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7` 0.000e+00 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7` -7.199e-01 1.752e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7` 0.000e+00 2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7` -3.962e-01 1.402e+00
## z value Pr(>|z|)
## (Intercept) -0.594 0.5525
## biddable 13.712 <2e-16 ***
## D.ratio.nstopwrds.nwrds -0.895 0.3707
## D.npnct15.log 1.494 0.1353
## D.npnct03.log 0.329 0.7423
## D.terms.n.stem.stop.Ratio 0.841 0.4004
## D.ratio.sum.TfIdf.nwrds -1.209 0.2267
## D.npnct01.log 0.172 0.8635
## .rnorm 0.108 0.9140
## D.TfIdf.sum.stem.stop.Ratio 0.058 0.9541
## storage.fctr16 -0.332 0.7398
## storage.fctr32 -0.521 0.6021
## storage.fctr64 0.755 0.4501
## storage.fctrUnknown 0.257 0.7974
## D.npnct11.log 0.349 0.7268
## D.npnct10.log -0.494 0.6214
## D.TfIdf.sum.post.stop 0.224 0.8229
## D.TfIdf.sum.post.stem 0.328 0.7431
## D.sum.TfIdf 0.328 0.7431
## `prdl.my.descr.fctrUnknown#1` -0.404 0.6860
## `prdl.my.descr.fctriPad 1#0` 1.055 0.2915
## `prdl.my.descr.fctriPad 1#1` 0.881 0.3784
## `prdl.my.descr.fctriPad 2#0` 2.215 0.0268 *
## `prdl.my.descr.fctriPad 2#1` -0.052 0.9584
## `prdl.my.descr.fctriPad 3+#0` 1.974 0.0484 *
## `prdl.my.descr.fctriPad 3+#1` -0.790 0.4295
## `prdl.my.descr.fctriPadAir#0` 0.353 0.7238
## `prdl.my.descr.fctriPadAir#1` 0.163 0.8703
## `prdl.my.descr.fctriPadmini 2+#0` -0.004 0.9971
## `prdl.my.descr.fctriPadmini 2+#1` 0.153 0.8781
## `prdl.my.descr.fctriPadmini#0` 0.701 0.4832
## `prdl.my.descr.fctriPadmini#1` 0.246 0.8056
## D.npnct13.log -0.681 0.4959
## color.fctrGold 0.003 0.9977
## `color.fctrSpace Gray` -0.426 0.6700
## color.fctrUnknown -1.128 0.2592
## color.fctrWhite -0.830 0.4067
## D.npnct08.log 0.423 0.6725
## D.npnct16.log 0.545 0.5857
## D.npnct24.log 0.145 0.8849
## D.nstopwrds.log 0.687 0.4921
## D.npnct06.log -1.431 0.1525
## D.npnct28.log -0.028 0.9780
## D.nuppr.log -0.218 0.8273
## D.nchrs.log -0.119 0.9051
## D.nwrds.log -0.237 0.8127
## D.npnct12.log -0.923 0.3558
## carrier.fctrNone 0.203 0.8394
## carrier.fctrOther 0.324 0.7459
## carrier.fctrSprint 0.199 0.8426
## `carrier.fctrT-Mobile` -0.617 0.5372
## carrier.fctrUnknown -0.248 0.8042
## carrier.fctrVerizon 1.642 0.1006
## D.npnct09.log -0.297 0.7666
## D.ndgts.log 1.008 0.3135
## D.nwrds.unq.log -0.185 0.8535
## D.terms.n.post.stem.log -0.185 0.8535
## D.terms.n.post.stop.log -0.187 0.8517
## cellular.fctr1 0.056 0.9555
## cellular.fctrUnknown -0.255 0.7989
## D.npnct14.log -1.418 0.1561
## D.terms.n.post.stem -0.338 0.7354
## D.terms.n.post.stop -0.380 0.7041
## D.npnct05.log -1.745 0.0810 .
## `condition.fctrFor parts or not working` 1.019 0.3084
## `condition.fctrManufacturer refurbished` 1.080 0.2801
## condition.fctrNew -1.029 0.3034
## `condition.fctrNew other (see details)` 1.614 0.1065
## `condition.fctrSeller refurbished` -1.190 0.2341
## idseq.my -0.462 0.6444
## startprice.diff -8.916 <2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2` 0.000 1.0000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2` 0.915 0.3602
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2` 0.000 1.0000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2` -0.204 0.8383
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2` 0.000 1.0000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2` -0.007 0.9942
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2` 0.000 1.0000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2` 1.089 0.2760
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2` 0.000 1.0000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2` 0.408 0.6831
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2` 0.000 1.0000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2` 0.231 0.8174
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2` 0.000 1.0000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2` -0.005 0.9956
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3` 0.000 1.0000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3` 0.236 0.8136
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3` 0.000 1.0000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3` 0.097 0.9226
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3` 0.000 1.0000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3` -0.352 0.7248
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3` 0.000 1.0000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3` 0.069 0.9449
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3` 0.000 1.0000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3` -0.462 0.6441
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3` 0.000 1.0000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` 0.145 0.8851
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3` 0.000 1.0000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3` 0.059 0.9526
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4` 0.000 1.0000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4` 1.095 0.2734
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4` 0.000 1.0000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4` -0.046 0.9636
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4` 0.000 1.0000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4` 0.825 0.4093
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4` 0.000 1.0000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4` -0.981 0.3268
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4` 0.000 1.0000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4` -0.665 0.5060
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4` 0.000 1.0000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` -0.086 0.9317
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4` 0.000 1.0000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4` 1.318 0.1875
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5` 0.000 1.0000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5` 0.000 1.0000
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5` 0.000 1.0000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5` 1.344 0.1789
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5` 0.000 1.0000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5` -0.213 0.8315
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5` 0.000 1.0000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5` 2.827 0.0047 **
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5` 0.000 1.0000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5` 0.155 0.8765
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5` 0.000 1.0000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5` 0.000 1.0000
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5` 0.000 1.0000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5` 0.893 0.3721
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6` 0.000 1.0000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6` 0.000 1.0000
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6` 0.000 1.0000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6` 0.000 1.0000
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6` 0.000 1.0000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6` -1.011 0.3119
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6` 0.000 1.0000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6` 0.326 0.7444
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6` 0.000 1.0000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6` 0.000 1.0000
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6` 0.000 1.0000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6` 0.000 1.0000
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6` 0.000 1.0000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6` 0.330 0.7412
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7` 0.000 1.0000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7` 0.000 1.0000
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7` 0.000 1.0000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7` 0.000 1.0000
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7` 0.000 1.0000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7` 0.000 1.0000
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7` 0.000 1.0000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7` -0.411 0.6812
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7` 0.000 1.0000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7` 0.000 1.0000
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7` 0.000 1.0000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7` 0.000 1.0000
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7` 0.000 1.0000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7` -0.283 0.7775
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1337.51 on 968 degrees of freedom
## Residual deviance: 746.68 on 814 degrees of freedom
## AIC: 1056.7
##
## Number of Fisher Scoring iterations: 18
##
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.7241094
## 3 0.2 0.7644363
## 4 0.3 0.8008172
## 5 0.4 0.8105148
## 6 0.5 0.8175520
## 7 0.6 0.8029021
## 8 0.7 0.7888748
## 9 0.8 0.7449393
## 10 0.9 0.5495208
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.All.X.bayesglm.N
## 1 N 457
## 2 Y 93
## sold.fctr.predict.All.X.bayesglm.Y
## 1 65
## 2 354
## Prediction
## Reference N Y
## N 457 65
## Y 93 354
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.369453e-01 6.704422e-01 8.121533e-01 8.596691e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 6.718903e-86 3.171338e-02
## [1] " calling mypredict_mdl for OOB:"
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.6983289
## 3 0.2 0.7231527
## 4 0.3 0.7378641
## 5 0.4 0.7551963
## 6 0.5 0.7641278
## 7 0.6 0.7593308
## 8 0.7 0.7472826
## 9 0.8 0.6775148
## 10 0.9 0.4436364
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.All.X.bayesglm.N
## 1 N 387
## 2 Y 102
## sold.fctr.predict.All.X.bayesglm.Y
## 1 90
## 2 311
## Prediction
## Reference N Y
## N 387 90
## Y 102 311
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.842697e-01 5.654496e-01 7.557654e-01 8.108777e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 1.381223e-53 4.272789e-01
## model_id model_method
## 1 All.X.bayesglm bayesglm
## feats
## 1 biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 1 3.299 0.772
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.9042146 0.5 0.817552 0.7770898
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.8121533 0.8596691 0.5506703 0.8427064
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.5 0.7641278 0.7842697
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1 0.7557654 0.8108777 0.5654496 1056.676
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.03447532 0.06919184
## label step_major step_minor bgn end elapsed
## 3 fit.models_1_bayesglm 3 0 102.277 108.717 6.44
## 4 fit.models_1_glmnet 4 0 108.718 NA NA
## [1] "fitting model: All.X.glmnet"
## [1] " indep_vars: biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr"
## Loading required package: glmnet
## Loaded glmnet 2.0-2
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.55, lambda = 0.0559 on full training set
## Warning in myfit_mdl(model_id = model_id, model_method = method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: lambda
## Length Class Mode
## a0 94 -none- numeric
## beta 14476 dgCMatrix S4
## df 94 -none- numeric
## dim 2 -none- numeric
## lambda 94 -none- numeric
## dev.ratio 94 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 154 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept) biddable
## -0.899322561 2.001584168
## prdl.my.descr.fctriPad 2#0 prdl.my.descr.fctriPad 3+#0
## 0.290095210 0.105586993
## D.terms.n.post.stem D.terms.n.post.stop
## -0.001144834 -0.006953112
## D.npnct05.log condition.fctrNew
## -0.404659882 -0.113024355
## startprice.diff
## -0.005271528
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## 2.036899e+00
## biddable
## 3.108468e+00
## D.ratio.nstopwrds.nwrds
## -7.610486e+00
## D.npnct15.log
## 1.337715e+00
## D.npnct03.log
## 5.644884e-01
## D.terms.n.stem.stop.Ratio
## 3.877586e+00
## D.ratio.sum.TfIdf.nwrds
## -9.955486e-01
## D.npnct01.log
## -6.912816e-02
## .rnorm
## 1.200630e-02
## D.TfIdf.sum.stem.stop.Ratio
## 2.583627e-01
## storage.fctr16
## -1.492889e-01
## storage.fctr32
## -2.538723e-01
## storage.fctr64
## 3.898066e-01
## storage.fctrUnknown
## 2.109165e-01
## D.npnct11.log
## 1.336271e-01
## D.npnct10.log
## -1.008654e+00
## D.TfIdf.sum.post.stem
## 1.740092e-01
## D.sum.TfIdf
## 1.604829e-01
## prdl.my.descr.fctrUnknown#1
## -7.484133e-01
## prdl.my.descr.fctriPad 1#0
## 7.248021e-01
## prdl.my.descr.fctriPad 1#1
## 7.365368e-01
## prdl.my.descr.fctriPad 2#0
## 1.600105e+00
## prdl.my.descr.fctriPad 3+#0
## 1.164531e+00
## prdl.my.descr.fctriPad 3+#1
## -8.444403e-01
## prdl.my.descr.fctriPadAir#0
## 3.271335e-01
## prdl.my.descr.fctriPadAir#1
## 2.013535e-01
## prdl.my.descr.fctriPadmini 2+#0
## 1.593440e-01
## prdl.my.descr.fctriPadmini 2+#1
## 2.251656e-01
## prdl.my.descr.fctriPadmini#0
## 4.908239e-01
## prdl.my.descr.fctriPadmini#1
## -3.052192e-01
## D.npnct13.log
## -3.497295e-01
## color.fctrGold
## -3.755518e-02
## color.fctrSpace Gray
## -1.279132e-01
## color.fctrUnknown
## -3.204201e-01
## color.fctrWhite
## -2.602917e-01
## D.npnct08.log
## 4.963979e-01
## D.npnct16.log
## 1.398245e+00
## D.nstopwrds.log
## 2.185585e+00
## D.npnct06.log
## -3.485130e+00
## D.npnct28.log
## -1.526272e+00
## D.nuppr.log
## -5.352202e-01
## D.nchrs.log
## -1.121831e-04
## D.nwrds.log
## -1.475618e+00
## D.npnct12.log
## -7.092382e-01
## carrier.fctrNone
## 1.997221e-01
## carrier.fctrOther
## 7.534329e+00
## carrier.fctrSprint
## 1.776846e-01
## carrier.fctrT-Mobile
## -7.284659e-01
## carrier.fctrUnknown
## -6.448419e-02
## carrier.fctrVerizon
## 7.545986e-01
## D.npnct09.log
## -1.853972e+00
## D.ndgts.log
## 4.390575e-01
## D.nwrds.unq.log
## -1.757319e-01
## D.terms.n.post.stem.log
## -1.348930e-01
## D.terms.n.post.stop.log
## -7.030408e-01
## cellular.fctrUnknown
## -3.520470e-01
## D.npnct14.log
## -1.324894e+00
## D.terms.n.post.stop
## -1.318643e-01
## D.npnct05.log
## -3.001452e+00
## condition.fctrFor parts or not working
## 3.717266e-01
## condition.fctrManufacturer refurbished
## 6.531141e-01
## condition.fctrNew
## -3.114269e-01
## condition.fctrNew other (see details)
## 7.851594e-01
## condition.fctrSeller refurbished
## -5.922203e-01
## idseq.my
## -8.380108e-05
## startprice.diff
## -1.378775e-02
## prdl.my.descr.fctrUnknown#1:.clusterid.fctr2
## 1.219070e+00
## prdl.my.descr.fctriPad 1#1:.clusterid.fctr2
## -1.429967e-01
## prdl.my.descr.fctriPad 2#1:.clusterid.fctr2
## -1.283517e-01
## prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2
## 1.085719e+00
## prdl.my.descr.fctriPadAir#1:.clusterid.fctr2
## 3.584049e-01
## prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2
## 4.109503e-01
## prdl.my.descr.fctriPadmini#1:.clusterid.fctr2
## 6.237503e-01
## prdl.my.descr.fctrUnknown#1:.clusterid.fctr3
## 7.536223e-01
## prdl.my.descr.fctriPad 1#1:.clusterid.fctr3
## 2.338335e-01
## prdl.my.descr.fctriPad 2#1:.clusterid.fctr3
## -4.454604e-01
## prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3
## 3.337723e-01
## prdl.my.descr.fctriPadAir#1:.clusterid.fctr3
## -6.278645e-01
## prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3
## 2.062872e-01
## prdl.my.descr.fctriPadmini#1:.clusterid.fctr3
## 5.957491e-01
## prdl.my.descr.fctrUnknown#1:.clusterid.fctr4
## 6.921266e+00
## prdl.my.descr.fctriPad 1#1:.clusterid.fctr4
## -3.873423e-02
## prdl.my.descr.fctriPad 2#1:.clusterid.fctr4
## 9.641785e-01
## prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4
## -5.028161e+00
## prdl.my.descr.fctriPadAir#1:.clusterid.fctr4
## -8.400228e-01
## prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4
## -3.445745e+00
## prdl.my.descr.fctriPadmini#1:.clusterid.fctr4
## 7.144367e+00
## prdl.my.descr.fctriPad 1#1:.clusterid.fctr5
## 6.830081e+00
## prdl.my.descr.fctriPad 2#1:.clusterid.fctr5
## -1.525406e-01
## prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5
## 2.753577e+00
## prdl.my.descr.fctriPadAir#1:.clusterid.fctr5
## -2.200136e+00
## prdl.my.descr.fctriPadmini#1:.clusterid.fctr5
## 1.902956e+00
## prdl.my.descr.fctriPad 2#1:.clusterid.fctr6
## -1.588487e+00
## prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6
## 7.311634e-01
## prdl.my.descr.fctriPadmini#1:.clusterid.fctr6
## 9.893509e-01
## prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7
## -4.388981e+00
## prdl.my.descr.fctriPadmini#1:.clusterid.fctr7
## -6.657104e-02
## character(0)
## character(0)
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.6412653
## 3 0.2 0.6851424
## 4 0.3 0.7174542
## 5 0.4 0.7514061
## 6 0.5 0.7771295
## 7 0.6 0.8029021
## 8 0.7 0.7994859
## 9 0.8 0.1825558
## 10 0.9 0.0000000
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.All.X.glmnet.N
## 1 N 474
## 2 Y 115
## sold.fctr.predict.All.X.glmnet.Y
## 1 48
## 2 332
## Prediction
## Reference N Y
## N 474 48
## Y 115 332
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.317853e-01 6.578590e-01 8.067182e-01 8.548166e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 1.002400e-82 2.346973e-07
## [1] " calling mypredict_mdl for OOB:"
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.6418026
## 3 0.2 0.6821192
## 4 0.3 0.7079832
## 5 0.4 0.7440191
## 6 0.5 0.7627329
## 7 0.6 0.7932817
## 8 0.7 0.7956104
## 9 0.8 0.1596452
## 10 0.9 0.0000000
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.7000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.All.X.glmnet.N
## 1 N 451
## 2 Y 123
## sold.fctr.predict.All.X.glmnet.Y
## 1 26
## 2 290
## Prediction
## Reference N Y
## N 451 26
## Y 123 290
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.325843e-01 6.580401e-01 8.064031e-01 8.565410e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 7.704832e-78 3.702005e-15
## model_id model_method
## 1 All.X.glmnet glmnet
## feats
## 1 biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 9 6.992 1.523
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.8677904 0.6 0.8029021 0.8008256
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.8067182 0.8548166 0.59697 0.8560007
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.7 0.7956104 0.8325843
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.8064031 0.856541 0.6580401
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.03693608 0.0748364
## label step_major step_minor bgn end elapsed
## 4 fit.models_1_glmnet 4 0 108.718 119.617 10.899
## 5 fit.models_1_rpart 5 0 119.617 NA NA
## [1] "fitting model: All.X.no.rnorm.rpart"
## [1] " indep_vars: biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting cp = 0.00671 on full training set
## Warning in myfit_mdl(model_id = model_id, model_method = method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: cp
## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7,
## cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2,
## surrogatestyle = 0, maxdepth = 30, xval = 0))
## n= 969
##
## CP nsplit rel error
## 1 0.527964206 0 1.0000000
## 2 0.134228188 1 0.4720358
## 3 0.006711409 2 0.3378076
##
## Variable importance
## biddable
## 51
## startprice.diff
## 28
## idseq.my
## 8
## prdl.my.descr.fctriPad 3+#0
## 3
## prdl.my.descr.fctriPad 2#0
## 3
## condition.fctrFor parts or not working
## 2
## prdl.my.descr.fctriPad 1#0
## 2
## color.fctrGold
## 1
##
## Node number 1: 969 observations, complexity param=0.5279642
## predicted class=N expected loss=0.4613003 P(node) =1
## class counts: 522 447
## probabilities: 0.539 0.461
## left son=2 (539 obs) right son=3 (430 obs)
## Primary splits:
## biddable < 0.5 to the left, improve=151.58290, (0 missing)
## startprice.diff < 62.89456 to the right, improve= 82.96307, (0 missing)
## idseq.my < 876.5 to the right, improve= 37.84375, (0 missing)
## condition.fctrNew < 0.5 to the right, improve= 16.22579, (0 missing)
## prdl.my.descr.fctriPad 2#0 < 0.5 to the left, improve= 13.28426, (0 missing)
## Surrogate splits:
## idseq.my < 798 to the right, agree=0.628, adj=0.163, (0 split)
## prdl.my.descr.fctriPad 3+#0 < 0.5 to the left, agree=0.586, adj=0.067, (0 split)
## prdl.my.descr.fctriPad 2#0 < 0.5 to the left, agree=0.579, adj=0.051, (0 split)
## condition.fctrFor parts or not working < 0.5 to the left, agree=0.578, adj=0.049, (0 split)
## prdl.my.descr.fctriPad 1#0 < 0.5 to the left, agree=0.573, adj=0.037, (0 split)
##
## Node number 2: 539 observations
## predicted class=N expected loss=0.2115028 P(node) =0.5562436
## class counts: 425 114
## probabilities: 0.788 0.212
##
## Node number 3: 430 observations, complexity param=0.1342282
## predicted class=Y expected loss=0.2255814 P(node) =0.4437564
## class counts: 97 333
## probabilities: 0.226 0.774
## left son=6 (80 obs) right son=7 (350 obs)
## Primary splits:
## startprice.diff < 63.51092 to the right, improve=82.902920, (0 missing)
## idseq.my < 893.5 to the right, improve=15.999440, (0 missing)
## cellular.fctrUnknown < 0.5 to the right, improve= 3.057989, (0 missing)
## prdl.my.descr.fctriPad 2#0 < 0.5 to the left, improve= 2.726027, (0 missing)
## condition.fctrNew < 0.5 to the right, improve= 2.683363, (0 missing)
## Surrogate splits:
## color.fctrGold < 0.5 to the right, agree=0.819, adj=0.025, (0 split)
## D.ratio.nstopwrds.nwrds < 0.1380952 to the left, agree=0.816, adj=0.013, (0 split)
## D.nwrds.unq.log < 2.602003 to the right, agree=0.816, adj=0.013, (0 split)
## D.terms.n.post.stem.log < 2.602003 to the right, agree=0.816, adj=0.013, (0 split)
## D.terms.n.post.stop.log < 2.602003 to the right, agree=0.816, adj=0.013, (0 split)
##
## Node number 6: 80 observations
## predicted class=N expected loss=0.125 P(node) =0.08255934
## class counts: 70 10
## probabilities: 0.875 0.125
##
## Node number 7: 350 observations
## predicted class=Y expected loss=0.07714286 P(node) =0.3611971
## class counts: 27 323
## probabilities: 0.077 0.923
##
## n= 969
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 969 447 N (0.53869969 0.46130031)
## 2) biddable< 0.5 539 114 N (0.78849722 0.21150278) *
## 3) biddable>=0.5 430 97 Y (0.22558140 0.77441860)
## 6) startprice.diff>=63.51092 80 10 N (0.87500000 0.12500000) *
## 7) startprice.diff< 63.51092 350 27 Y (0.07714286 0.92285714) *
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.6313559
## 3 0.2 0.6541916
## 4 0.3 0.8105395
## 5 0.4 0.8105395
## 6 0.5 0.8105395
## 7 0.6 0.8105395
## 8 0.7 0.8105395
## 9 0.8 0.8105395
## 10 0.9 0.8105395
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.All.X.no.rnorm.rpart.N
## 1 N 495
## 2 Y 124
## sold.fctr.predict.All.X.no.rnorm.rpart.Y
## 1 27
## 2 323
## Prediction
## Reference N Y
## N 495 27
## Y 124 323
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.441692e-01 6.814949e-01 8.197763e-01 8.664485e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 1.762753e-90 5.612287e-15
## [1] " calling mypredict_mdl for OOB:"
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.6339217
## 3 0.2 0.6633907
## 4 0.3 0.8102981
## 5 0.4 0.8102981
## 6 0.5 0.8102981
## 7 0.6 0.8102981
## 8 0.7 0.8102981
## 9 0.8 0.8102981
## 10 0.9 0.8102981
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.All.X.no.rnorm.rpart.N
## 1 N 451
## 2 Y 114
## sold.fctr.predict.All.X.no.rnorm.rpart.Y
## 1 26
## 2 299
## Prediction
## Reference N Y
## N 451 26
## Y 114 299
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.426966e-01 6.791719e-01 8.170871e-01 8.660125e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 1.090657e-83 1.940362e-13
## model_id model_method
## 1 All.X.no.rnorm.rpart rpart
## feats
## 1 biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 3 1.834 0.096
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.8434283 0.9 0.8105395 0.8338493
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.8197763 0.8664485 0.6645079 0.8469855
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.9 0.8102981 0.8426966
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.8170871 0.8660125 0.6791719
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.008937311 0.01629107
## label step_major step_minor bgn end elapsed
## 5 fit.models_1_rpart 5 0 119.617 125.076 5.459
## 6 fit.models_1_rf 6 0 125.076 NA NA
## [1] "fitting model: All.X.no.rnorm.rf"
## [1] " indep_vars: biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr"
## Loading required package: randomForest
## randomForest 4.6-10
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:dplyr':
##
## combine
##
## The following object is masked from 'package:gdata':
##
## combine
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 77 on full training set
## Length Class Mode
## call 4 -none- call
## type 1 -none- character
## predicted 969 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 1938 matrix numeric
## oob.times 969 -none- numeric
## classes 2 -none- character
## importance 153 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 969 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 153 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.8579655
## 3 0.2 0.9520767
## 4 0.3 0.9781182
## 5 0.4 0.9988827
## 6 0.5 1.0000000
## 7 0.6 1.0000000
## 8 0.7 0.9652778
## 9 0.8 0.8935644
## 10 0.9 0.8111702
## 11 1.0 0.2807692
## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.All.X.no.rnorm.rf.N
## 1 N 522
## 2 Y NA
## sold.fctr.predict.All.X.no.rnorm.rf.Y
## 1 NA
## 2 447
## Prediction
## Reference N Y
## N 522 0
## Y 0 447
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 1.000000e+00 1.000000e+00 9.962003e-01 1.000000e+00 5.386997e-01
## AccuracyPValue McnemarPValue
## 4.731267e-261 NaN
## [1] " calling mypredict_mdl for OOB:"
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.7527675
## 3 0.2 0.7930328
## 4 0.3 0.8031146
## 5 0.4 0.8169014
## 6 0.5 0.8294479
## 7 0.6 0.8209719
## 8 0.7 0.8156124
## 9 0.8 0.7685714
## 10 0.9 0.6645669
## 11 1.0 0.1562500
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.All.X.no.rnorm.rf.N
## 1 N 413
## 2 Y 75
## sold.fctr.predict.All.X.no.rnorm.rf.Y
## 1 64
## 2 338
## Prediction
## Reference N Y
## N 413 64
## Y 75 338
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.438202e-01 6.854548e-01 8.182763e-01 8.670627e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 2.342835e-84 3.963328e-01
## model_id model_method
## 1 All.X.no.rnorm.rf rf
## feats
## 1 biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 3 19.691 7.227
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 1 0.6 1 0.8482972
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.9962003 1 0.6925622 0.9180131
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.5 0.8294479 0.8438202
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.8182763 0.8670627 0.6854548
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.008191181 0.01601525
## label step_major step_minor bgn end elapsed
## 6 fit.models_1_rf 6 0 125.076 148.261 23.185
## 7 fit.models_1_glm 7 0 148.261 NA NA
## [1] "fitting model: All.Interact.X.glm"
## [1] " indep_vars: D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Fitting final model on full training set
## Warning: glm.fit: algorithm did not converge
## Warning: glm.fit: fitted probabilities numerically 0 or 1 occurred
## Warning: not plotting observations with leverage one:
## 306, 346, 354, 619, 643, 935, 939
## Warning: not plotting observations with leverage one:
## 306, 346, 354, 619, 643, 935, 939
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
## Warning in sqrt(crit * p * (1 - hh)/hh): NaNs produced
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -8.49 0.00 0.00 0.00 8.49
##
## Coefficients: (115 not defined because of singularities)
## Estimate
## (Intercept) -1.949e+18
## D.ratio.nstopwrds.nwrds -7.914e+15
## D.terms.n.stem.stop.Ratio 1.946e+18
## D.npnct01.log -7.371e+13
## .rnorm -2.269e+13
## storage.fctr16 -1.048e+14
## storage.fctr32 -5.766e+13
## storage.fctr64 2.492e+14
## storage.fctrUnknown -1.001e+14
## D.npnct11.log 2.205e+14
## D.npnct10.log -1.547e+15
## D.TfIdf.sum.post.stop 1.832e+15
## D.TfIdf.sum.post.stem -1.463e+15
## D.sum.TfIdf NA
## `prdl.my.descr.fctrUnknown#1` -2.216e+16
## `prdl.my.descr.fctriPad 1#0` -3.201e+14
## `prdl.my.descr.fctriPad 1#1` 6.959e+15
## `prdl.my.descr.fctriPad 2#0` 9.460e+14
## `prdl.my.descr.fctriPad 2#1` 1.726e+16
## `prdl.my.descr.fctriPad 3+#0` 5.188e+14
## `prdl.my.descr.fctriPad 3+#1` 8.052e+14
## `prdl.my.descr.fctriPadAir#0` -7.874e+13
## `prdl.my.descr.fctriPadAir#1` 9.935e+14
## `prdl.my.descr.fctriPadmini 2+#0` -3.994e+14
## `prdl.my.descr.fctriPadmini 2+#1` 1.116e+16
## `prdl.my.descr.fctriPadmini#0` 5.299e+14
## `prdl.my.descr.fctriPadmini#1` -2.064e+15
## D.npnct13.log -2.577e+14
## color.fctrGold -2.592e+14
## `color.fctrSpace Gray` -5.731e+14
## color.fctrUnknown -3.102e+14
## color.fctrWhite -2.727e+14
## D.npnct08.log -4.271e+14
## D.npnct16.log 5.412e+14
## D.npnct24.log NA
## D.nstopwrds.log 1.777e+15
## D.npnct06.log -1.699e+15
## D.npnct28.log 1.187e+15
## D.nuppr.log 1.350e+15
## D.npnct12.log -4.147e+14
## D.npnct09.log -1.996e+15
## D.ndgts.log 2.058e+14
## D.nwrds.unq.log -2.159e+18
## D.terms.n.post.stem.log NA
## D.terms.n.post.stop.log 2.154e+18
## D.npnct14.log -9.199e+13
## D.terms.n.post.stem 1.115e+16
## D.terms.n.post.stop -1.089e+16
## D.npnct05.log -1.707e+15
## `condition.fctrFor parts or not working` -7.520e+13
## `condition.fctrManufacturer refurbished` 8.492e+13
## condition.fctrNew -1.198e+14
## `condition.fctrNew other (see details)` 3.110e+14
## `condition.fctrSeller refurbished` -3.901e+14
## idseq.my -5.154e+11
## D.ratio.sum.TfIdf.nwrds 1.085e+15
## D.TfIdf.sum.stem.stop.Ratio 1.204e+16
## D.npnct15.log -8.930e+14
## D.npnct03.log 6.038e+13
## D.nwrds.log 1.008e+15
## D.nchrs.log -1.931e+15
## startprice.diff -4.156e+12
## biddable 1.107e+15
## cellular.fctr1 -1.068e+14
## cellular.fctrUnknown -3.213e+13
## carrier.fctrNone NA
## carrier.fctrOther 6.655e+15
## carrier.fctrSprint 3.488e+14
## `carrier.fctrT-Mobile` -7.666e+13
## carrier.fctrUnknown -1.190e+14
## carrier.fctrVerizon 3.237e+14
## `prdl.my.descr.fctrUnknown#1:idseq.my` 2.317e+12
## `prdl.my.descr.fctriPad 1#0:idseq.my` 5.716e+11
## `prdl.my.descr.fctriPad 1#1:idseq.my` 8.043e+11
## `prdl.my.descr.fctriPad 2#0:idseq.my` 1.894e+11
## `prdl.my.descr.fctriPad 2#1:idseq.my` 1.308e+11
## `prdl.my.descr.fctriPad 3+#0:idseq.my` 3.178e+11
## `prdl.my.descr.fctriPad 3+#1:idseq.my` 5.863e+11
## `prdl.my.descr.fctriPadAir#0:idseq.my` 5.262e+11
## `prdl.my.descr.fctriPadAir#1:idseq.my` 9.690e+11
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my` 3.645e+11
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my` 1.815e+11
## `prdl.my.descr.fctriPadmini#0:idseq.my` 2.030e+11
## `prdl.my.descr.fctriPadmini#1:idseq.my` 2.471e+11
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds` -1.543e+15
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds` -7.686e+14
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds` -3.397e+15
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds` -3.023e+15
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds` -2.267e+15
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds` -2.807e+15
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio` 2.284e+16
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio` -1.704e+15
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio` -1.245e+16
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio` -8.177e+14
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio` 1.134e+14
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio` 2.137e+16
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log` 1.789e+15
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log` 9.363e+14
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log` 1.234e+15
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log` NA
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log` -1.227e+15
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log` -1.640e+15
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log` -1.703e+15
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log` 1.582e+15
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log` 1.310e+16
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log` NA
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log` 2.585e+15
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log` 3.350e+15
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log` -1.273e+14
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log` -3.288e+15
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log` -1.576e+15
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log` 2.244e+16
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log` NA
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log` -2.280e+15
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log` -3.416e+15
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log` -1.154e+15
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log` 1.916e+15
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log` 5.694e+14
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log` -2.087e+16
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log` NA
## `startprice.diff:biddable` -1.179e+13
## `cellular.fctr1:carrier.fctrNone` NA
## `cellular.fctrUnknown:carrier.fctrNone` NA
## `cellular.fctr1:carrier.fctrOther` NA
## `cellular.fctrUnknown:carrier.fctrOther` NA
## `cellular.fctr1:carrier.fctrSprint` NA
## `cellular.fctrUnknown:carrier.fctrSprint` NA
## `cellular.fctr1:carrier.fctrT-Mobile` NA
## `cellular.fctrUnknown:carrier.fctrT-Mobile` NA
## `cellular.fctr1:carrier.fctrUnknown` NA
## `cellular.fctrUnknown:carrier.fctrUnknown` NA
## `cellular.fctr1:carrier.fctrVerizon` NA
## `cellular.fctrUnknown:carrier.fctrVerizon` NA
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2` 9.529e+13
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2` -6.143e+14
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2` 8.278e+14
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2` 2.097e+13
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2` -2.463e+14
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2` 1.036e+15
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2` 5.563e+14
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3` -1.431e+14
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3` -4.879e+13
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3` -1.209e+14
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3` 2.561e+14
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3` -6.053e+14
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` -2.486e+15
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3` 1.338e+15
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4` 2.887e+15
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4` 1.413e+14
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4` 1.074e+15
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4` -1.298e+15
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4` -9.170e+14
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` -4.855e+15
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4` 2.876e+15
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5` 3.111e+15
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5` 2.088e+15
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5` 1.528e+15
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5` -2.634e+15
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5` 1.399e+15
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6` -1.282e+15
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6` -1.249e+14
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6` 1.627e+15
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7` -1.306e+14
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7` 5.512e+14
## Std. Error
## (Intercept) 8.552e+10
## D.ratio.nstopwrds.nwrds 1.734e+08
## D.terms.n.stem.stop.Ratio 8.548e+10
## D.npnct01.log 1.717e+07
## .rnorm 2.353e+06
## storage.fctr16 1.169e+07
## storage.fctr32 1.253e+07
## storage.fctr64 1.220e+07
## storage.fctrUnknown 1.658e+07
## D.npnct11.log 1.045e+07
## D.npnct10.log 6.406e+07
## D.TfIdf.sum.post.stop 9.198e+07
## D.TfIdf.sum.post.stem 9.570e+07
## D.sum.TfIdf NA
## `prdl.my.descr.fctrUnknown#1` 9.758e+08
## `prdl.my.descr.fctriPad 1#0` 2.668e+07
## `prdl.my.descr.fctriPad 1#1` 4.883e+08
## `prdl.my.descr.fctriPad 2#0` 2.945e+07
## `prdl.my.descr.fctriPad 2#1` 5.159e+08
## `prdl.my.descr.fctriPad 3+#0` 2.679e+07
## `prdl.my.descr.fctriPad 3+#1` 4.493e+08
## `prdl.my.descr.fctriPadAir#0` 2.719e+07
## `prdl.my.descr.fctriPadAir#1` 4.723e+08
## `prdl.my.descr.fctriPadmini 2+#0` 2.893e+07
## `prdl.my.descr.fctriPadmini 2+#1` 8.885e+08
## `prdl.my.descr.fctriPadmini#0` 2.750e+07
## `prdl.my.descr.fctriPadmini#1` 2.667e+08
## D.npnct13.log 1.160e+07
## color.fctrGold 1.311e+07
## `color.fctrSpace Gray` 9.452e+06
## color.fctrUnknown 6.921e+06
## color.fctrWhite 7.278e+06
## D.npnct08.log 2.038e+07
## D.npnct16.log 6.772e+07
## D.npnct24.log NA
## D.nstopwrds.log 4.786e+07
## D.npnct06.log 7.054e+07
## D.npnct28.log 7.155e+07
## D.nuppr.log 1.442e+08
## D.npnct12.log 2.026e+07
## D.npnct09.log 5.427e+07
## D.ndgts.log 1.452e+07
## D.nwrds.unq.log 9.505e+10
## D.terms.n.post.stem.log NA
## D.terms.n.post.stop.log 9.503e+10
## D.npnct14.log 2.119e+07
## D.terms.n.post.stem 5.040e+08
## D.terms.n.post.stop 5.020e+08
## D.npnct05.log 3.595e+07
## `condition.fctrFor parts or not working` 1.031e+07
## `condition.fctrManufacturer refurbished` 1.434e+07
## condition.fctrNew 7.893e+06
## `condition.fctrNew other (see details)` 1.154e+07
## `condition.fctrSeller refurbished` 1.141e+07
## idseq.my 1.751e+04
## D.ratio.sum.TfIdf.nwrds 5.714e+07
## D.TfIdf.sum.stem.stop.Ratio 6.741e+08
## D.npnct15.log 1.034e+08
## D.npnct03.log 5.880e+07
## D.nwrds.log 1.172e+08
## D.nchrs.log 1.876e+08
## startprice.diff 3.333e+04
## biddable 5.549e+06
## cellular.fctr1 7.773e+06
## cellular.fctrUnknown 1.675e+07
## carrier.fctrNone NA
## carrier.fctrOther 1.238e+08
## carrier.fctrSprint 1.891e+07
## `carrier.fctrT-Mobile` 2.505e+07
## carrier.fctrUnknown 1.193e+07
## carrier.fctrVerizon 1.112e+07
## `prdl.my.descr.fctrUnknown#1:idseq.my` 4.053e+04
## `prdl.my.descr.fctriPad 1#0:idseq.my` 2.345e+04
## `prdl.my.descr.fctriPad 1#1:idseq.my` 2.560e+04
## `prdl.my.descr.fctriPad 2#0:idseq.my` 2.670e+04
## `prdl.my.descr.fctriPad 2#1:idseq.my` 2.401e+04
## `prdl.my.descr.fctriPad 3+#0:idseq.my` 2.235e+04
## `prdl.my.descr.fctriPad 3+#1:idseq.my` 2.304e+04
## `prdl.my.descr.fctriPadAir#0:idseq.my` 2.178e+04
## `prdl.my.descr.fctriPadAir#1:idseq.my` 2.359e+04
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my` 2.319e+04
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my` 5.038e+04
## `prdl.my.descr.fctriPadmini#0:idseq.my` 2.152e+04
## `prdl.my.descr.fctriPadmini#1:idseq.my` 2.662e+04
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds` 5.944e+07
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds` 6.659e+07
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds` 7.203e+07
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds` 5.964e+07
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds` 7.046e+07
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds` 3.132e+08
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio` 8.688e+08
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio` 3.992e+08
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio` 4.373e+08
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio` 3.828e+08
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio` 4.121e+08
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio` 5.166e+08
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log` 1.125e+08
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log` 1.525e+08
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log` 1.099e+08
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log` NA
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log` 6.480e+07
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log` 7.651e+07
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log` 8.984e+07
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log` 8.144e+07
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log` 2.057e+08
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log` NA
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log` 1.682e+08
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log` 1.344e+08
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log` 1.177e+08
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log` 1.199e+08
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log` 1.097e+08
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log` 3.913e+08
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log` NA
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log` 1.499e+08
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log` 1.314e+08
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log` 1.194e+08
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log` 1.138e+08
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log` 1.037e+08
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log` 2.856e+08
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log` NA
## `startprice.diff:biddable` 4.794e+04
## `cellular.fctr1:carrier.fctrNone` NA
## `cellular.fctrUnknown:carrier.fctrNone` NA
## `cellular.fctr1:carrier.fctrOther` NA
## `cellular.fctrUnknown:carrier.fctrOther` NA
## `cellular.fctr1:carrier.fctrSprint` NA
## `cellular.fctrUnknown:carrier.fctrSprint` NA
## `cellular.fctr1:carrier.fctrT-Mobile` NA
## `cellular.fctrUnknown:carrier.fctrT-Mobile` NA
## `cellular.fctr1:carrier.fctrUnknown` NA
## `cellular.fctrUnknown:carrier.fctrUnknown` NA
## `cellular.fctr1:carrier.fctrVerizon` NA
## `cellular.fctrUnknown:carrier.fctrVerizon` NA
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2` 3.406e+07
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2` 2.792e+07
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2` 2.819e+07
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2` 2.135e+07
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2` 2.140e+07
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2` 5.006e+07
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2` 3.477e+07
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3` 4.614e+07
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3` 3.149e+07
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3` 2.990e+07
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3` 2.879e+07
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3` 2.876e+07
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` 5.071e+07
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3` 3.837e+07
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4` 6.170e+07
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4` 2.921e+07
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4` 3.284e+07
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4` 2.787e+07
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4` 2.824e+07
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` 9.589e+07
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4` 3.906e+07
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5` 4.293e+07
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5` 3.479e+07
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5` 2.490e+07
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5` 1.062e+08
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5` 3.897e+07
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6` 3.275e+07
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6` 3.788e+07
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6` 3.502e+07
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7` 5.025e+07
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7` 5.232e+07
## z value
## (Intercept) -22794359
## D.ratio.nstopwrds.nwrds -45647681
## D.terms.n.stem.stop.Ratio 22763438
## D.npnct01.log -4292132
## .rnorm -9642836
## storage.fctr16 -8965489
## storage.fctr32 -4602237
## storage.fctr64 20428806
## storage.fctrUnknown -6038845
## D.npnct11.log 21101189
## D.npnct10.log -24148586
## D.TfIdf.sum.post.stop 19920661
## D.TfIdf.sum.post.stem -15288767
## D.sum.TfIdf NA
## `prdl.my.descr.fctrUnknown#1` -22712649
## `prdl.my.descr.fctriPad 1#0` -11999998
## `prdl.my.descr.fctriPad 1#1` 14250000
## `prdl.my.descr.fctriPad 2#0` 32117430
## `prdl.my.descr.fctriPad 2#1` 33455148
## `prdl.my.descr.fctriPad 3+#0` 19367533
## `prdl.my.descr.fctriPad 3+#1` 1792204
## `prdl.my.descr.fctriPadAir#0` -2896485
## `prdl.my.descr.fctriPadAir#1` 2103347
## `prdl.my.descr.fctriPadmini 2+#0` -13805954
## `prdl.my.descr.fctriPadmini 2+#1` 12563668
## `prdl.my.descr.fctriPadmini#0` 19268065
## `prdl.my.descr.fctriPadmini#1` -7738752
## D.npnct13.log -22216331
## color.fctrGold -19767596
## `color.fctrSpace Gray` -60635936
## color.fctrUnknown -44825118
## color.fctrWhite -37477072
## D.npnct08.log -20958642
## D.npnct16.log 7991997
## D.npnct24.log NA
## D.nstopwrds.log 37126505
## D.npnct06.log -24085489
## D.npnct28.log 16595802
## D.nuppr.log 9363991
## D.npnct12.log -20471630
## D.npnct09.log -36787965
## D.ndgts.log 14177512
## D.nwrds.unq.log -22717560
## D.terms.n.post.stem.log NA
## D.terms.n.post.stop.log 22667939
## D.npnct14.log -4341800
## D.terms.n.post.stem 22132793
## D.terms.n.post.stop -21693373
## D.npnct05.log -47474833
## `condition.fctrFor parts or not working` -7291576
## `condition.fctrManufacturer refurbished` 5920619
## condition.fctrNew -15180688
## `condition.fctrNew other (see details)` 26946403
## `condition.fctrSeller refurbished` -34177168
## idseq.my -29436240
## D.ratio.sum.TfIdf.nwrds 18979532
## D.TfIdf.sum.stem.stop.Ratio 17854462
## D.npnct15.log -8633413
## D.npnct03.log 1026929
## D.nwrds.log 8597257
## D.nchrs.log -10292175
## startprice.diff -124676756
## biddable 199579657
## cellular.fctr1 -13739369
## cellular.fctrUnknown -1917584
## carrier.fctrNone NA
## carrier.fctrOther 53751445
## carrier.fctrSprint 18442313
## `carrier.fctrT-Mobile` -3060319
## carrier.fctrUnknown -9978042
## carrier.fctrVerizon 29108788
## `prdl.my.descr.fctrUnknown#1:idseq.my` 57175266
## `prdl.my.descr.fctriPad 1#0:idseq.my` 24373715
## `prdl.my.descr.fctriPad 1#1:idseq.my` 31414124
## `prdl.my.descr.fctriPad 2#0:idseq.my` 7095278
## `prdl.my.descr.fctriPad 2#1:idseq.my` 5445148
## `prdl.my.descr.fctriPad 3+#0:idseq.my` 14221391
## `prdl.my.descr.fctriPad 3+#1:idseq.my` 25450135
## `prdl.my.descr.fctriPadAir#0:idseq.my` 24159017
## `prdl.my.descr.fctriPadAir#1:idseq.my` 41074047
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my` 15721200
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my` 3603156
## `prdl.my.descr.fctriPadmini#0:idseq.my` 9433712
## `prdl.my.descr.fctriPadmini#1:idseq.my` 9284120
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds` -25963910
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds` -11543780
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds` -47160319
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds` -50684651
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds` -32178024
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds` -8962002
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio` 26287065
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio` -4269560
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio` -28476869
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio` -2135938
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio` 275045
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio` 41359651
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log` 15898875
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log` 6141172
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log` 11231896
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log` NA
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log` -18934461
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log` -21436269
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log` -18960300
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log` 19422530
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log` 63668025
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log` NA
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log` 15373709
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log` 24935480
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log` -1082084
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log` -27415165
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log` -14367260
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log` 57339679
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log` NA
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log` -15212124
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log` -26005271
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log` -9671646
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log` 16827353
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log` 5488661
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log` -73082396
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log` NA
## `startprice.diff:biddable` -245898336
## `cellular.fctr1:carrier.fctrNone` NA
## `cellular.fctrUnknown:carrier.fctrNone` NA
## `cellular.fctr1:carrier.fctrOther` NA
## `cellular.fctrUnknown:carrier.fctrOther` NA
## `cellular.fctr1:carrier.fctrSprint` NA
## `cellular.fctrUnknown:carrier.fctrSprint` NA
## `cellular.fctr1:carrier.fctrT-Mobile` NA
## `cellular.fctrUnknown:carrier.fctrT-Mobile` NA
## `cellular.fctr1:carrier.fctrUnknown` NA
## `cellular.fctrUnknown:carrier.fctrUnknown` NA
## `cellular.fctr1:carrier.fctrVerizon` NA
## `cellular.fctrUnknown:carrier.fctrVerizon` NA
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2` 2797477
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2` -22001461
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2` 29366267
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2` 982334
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2` -11507771
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2` 20686015
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2` 15999046
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3` -3102301
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3` -1549282
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3` -4043321
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3` 8894908
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3` -21051291
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` -49026094
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3` 34876349
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4` 46793175
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4` 4838038
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4` 32687376
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4` -46554561
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4` -32477580
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` -50629819
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4` 73626109
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5` 72459654
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5` 60008963
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5` 61367816
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5` -24803447
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5` 35894709
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6` -39137190
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6` -3297381
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6` 46467723
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7` -2599850
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7` 10534715
## Pr(>|z|)
## (Intercept) <2e-16 ***
## D.ratio.nstopwrds.nwrds <2e-16 ***
## D.terms.n.stem.stop.Ratio <2e-16 ***
## D.npnct01.log <2e-16 ***
## .rnorm <2e-16 ***
## storage.fctr16 <2e-16 ***
## storage.fctr32 <2e-16 ***
## storage.fctr64 <2e-16 ***
## storage.fctrUnknown <2e-16 ***
## D.npnct11.log <2e-16 ***
## D.npnct10.log <2e-16 ***
## D.TfIdf.sum.post.stop <2e-16 ***
## D.TfIdf.sum.post.stem <2e-16 ***
## D.sum.TfIdf NA
## `prdl.my.descr.fctrUnknown#1` <2e-16 ***
## `prdl.my.descr.fctriPad 1#0` <2e-16 ***
## `prdl.my.descr.fctriPad 1#1` <2e-16 ***
## `prdl.my.descr.fctriPad 2#0` <2e-16 ***
## `prdl.my.descr.fctriPad 2#1` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#1` <2e-16 ***
## `prdl.my.descr.fctriPadAir#0` <2e-16 ***
## `prdl.my.descr.fctriPadAir#1` <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0` <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#1` <2e-16 ***
## `prdl.my.descr.fctriPadmini#0` <2e-16 ***
## `prdl.my.descr.fctriPadmini#1` <2e-16 ***
## D.npnct13.log <2e-16 ***
## color.fctrGold <2e-16 ***
## `color.fctrSpace Gray` <2e-16 ***
## color.fctrUnknown <2e-16 ***
## color.fctrWhite <2e-16 ***
## D.npnct08.log <2e-16 ***
## D.npnct16.log <2e-16 ***
## D.npnct24.log NA
## D.nstopwrds.log <2e-16 ***
## D.npnct06.log <2e-16 ***
## D.npnct28.log <2e-16 ***
## D.nuppr.log <2e-16 ***
## D.npnct12.log <2e-16 ***
## D.npnct09.log <2e-16 ***
## D.ndgts.log <2e-16 ***
## D.nwrds.unq.log <2e-16 ***
## D.terms.n.post.stem.log NA
## D.terms.n.post.stop.log <2e-16 ***
## D.npnct14.log <2e-16 ***
## D.terms.n.post.stem <2e-16 ***
## D.terms.n.post.stop <2e-16 ***
## D.npnct05.log <2e-16 ***
## `condition.fctrFor parts or not working` <2e-16 ***
## `condition.fctrManufacturer refurbished` <2e-16 ***
## condition.fctrNew <2e-16 ***
## `condition.fctrNew other (see details)` <2e-16 ***
## `condition.fctrSeller refurbished` <2e-16 ***
## idseq.my <2e-16 ***
## D.ratio.sum.TfIdf.nwrds <2e-16 ***
## D.TfIdf.sum.stem.stop.Ratio <2e-16 ***
## D.npnct15.log <2e-16 ***
## D.npnct03.log <2e-16 ***
## D.nwrds.log <2e-16 ***
## D.nchrs.log <2e-16 ***
## startprice.diff <2e-16 ***
## biddable <2e-16 ***
## cellular.fctr1 <2e-16 ***
## cellular.fctrUnknown <2e-16 ***
## carrier.fctrNone NA
## carrier.fctrOther <2e-16 ***
## carrier.fctrSprint <2e-16 ***
## `carrier.fctrT-Mobile` <2e-16 ***
## carrier.fctrUnknown <2e-16 ***
## carrier.fctrVerizon <2e-16 ***
## `prdl.my.descr.fctrUnknown#1:idseq.my` <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:idseq.my` <2e-16 ***
## `prdl.my.descr.fctriPad 1#1:idseq.my` <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:idseq.my` <2e-16 ***
## `prdl.my.descr.fctriPad 2#1:idseq.my` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:idseq.my` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#1:idseq.my` <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:idseq.my` <2e-16 ***
## `prdl.my.descr.fctriPadAir#1:idseq.my` <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my` <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my` <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:idseq.my` <2e-16 ***
## `prdl.my.descr.fctriPadmini#1:idseq.my` <2e-16 ***
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds` <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds` <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds` <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds` <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds` <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds` NA
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio` <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio` <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio` <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio` <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio` <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio` NA
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log` <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log` <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log` NA
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log` NA
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log` <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log` <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log` <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log` <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log` NA
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log` NA
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log` <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log` <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log` <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log` <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log` <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log` NA
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log` NA
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log` <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log` <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log` <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log` <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log` <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log` NA
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log` NA
## `startprice.diff:biddable` <2e-16 ***
## `cellular.fctr1:carrier.fctrNone` NA
## `cellular.fctrUnknown:carrier.fctrNone` NA
## `cellular.fctr1:carrier.fctrOther` NA
## `cellular.fctrUnknown:carrier.fctrOther` NA
## `cellular.fctr1:carrier.fctrSprint` NA
## `cellular.fctrUnknown:carrier.fctrSprint` NA
## `cellular.fctr1:carrier.fctrT-Mobile` NA
## `cellular.fctrUnknown:carrier.fctrT-Mobile` NA
## `cellular.fctr1:carrier.fctrUnknown` NA
## `cellular.fctrUnknown:carrier.fctrUnknown` NA
## `cellular.fctr1:carrier.fctrVerizon` NA
## `cellular.fctrUnknown:carrier.fctrVerizon` NA
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2` <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2` <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2` <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2` <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2` <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2` <2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3` <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3` <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3` <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3` <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3` <2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4` <2e-16 ***
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4` <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4` <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4` <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` <2e-16 ***
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4` <2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5` <2e-16 ***
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5` <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5` <2e-16 ***
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5` <2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6` <2e-16 ***
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6` <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6` <2e-16 ***
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7` <2e-16 ***
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7` NA
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7` <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1337.5 on 968 degrees of freedom
## Residual deviance: 14705.8 on 825 degrees of freedom
## AIC: 14994
##
## Number of Fisher Scoring iterations: 25
##
## [1] " calling mypredict_mdl for fit:"
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.7926829
## 3 0.2 0.7926829
## 4 0.3 0.7926829
## 5 0.4 0.7926829
## 6 0.5 0.7926829
## 7 0.6 0.7926829
## 8 0.7 0.7926829
## 9 0.8 0.7926829
## 10 0.9 0.7926829
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.All.Interact.X.glm.N
## 1 N 375
## 2 Y 57
## sold.fctr.predict.All.Interact.X.glm.Y
## 1 147
## 2 390
## Prediction
## Reference N Y
## N 375 147
## Y 57 390
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 7.894737e-01 5.824493e-01 7.624276e-01 8.147476e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 1.491405e-59 4.627387e-10
## [1] " calling mypredict_mdl for OOB:"
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.6898803
## 3 0.2 0.6898803
## 4 0.3 0.6898803
## 5 0.4 0.6898803
## 6 0.5 0.6898803
## 7 0.6 0.6898803
## 8 0.7 0.6898803
## 9 0.8 0.6898803
## 10 0.9 0.6898803
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.All.Interact.X.glm.N
## 1 N 288
## 2 Y 96
## sold.fctr.predict.All.Interact.X.glm.Y
## 1 189
## 2 317
## Prediction
## Reference N Y
## N 288 189
## Y 96 317
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 6.797753e-01 3.658021e-01 6.480036e-01 7.103515e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 1.782162e-18 5.048049e-08
## model_id model_method
## 1 All.Interact.X.glm glm
## feats
## 1 D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 1 5.369 2.009
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.795437 0.9 0.7926829 0.747162
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.7624276 0.8147476 0.488484 0.685664
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.9 0.6898803 0.6797753
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1 0.6480036 0.7103515 0.3658021 14993.81
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.01396053 0.02568968
## label step_major step_minor bgn end elapsed
## 7 fit.models_1_glm 7 0 148.261 157.327 9.066
## 8 fit.models_1_bayesglm 8 0 157.328 NA NA
## [1] "fitting model: All.Interact.X.bayesglm"
## [1] " indep_vars: D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Fitting final model on full training set
##
## Call:
## NULL
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -3.3587 -0.5994 -0.1017 0.3551 2.6169
##
## Coefficients:
## Estimate
## (Intercept) -2.653e+00
## D.ratio.nstopwrds.nwrds -2.167e+00
## D.terms.n.stem.stop.Ratio 4.753e+00
## D.npnct01.log -2.386e-01
## .rnorm 1.500e-02
## storage.fctr16 -6.798e-02
## storage.fctr32 1.137e-01
## storage.fctr64 6.562e-01
## storage.fctrUnknown 3.281e-01
## D.npnct11.log -7.325e-02
## D.npnct10.log -1.269e+00
## D.TfIdf.sum.post.stop 1.046e-01
## D.TfIdf.sum.post.stem 1.051e-01
## D.sum.TfIdf 1.051e-01
## `prdl.my.descr.fctrUnknown#1` 7.537e-02
## `prdl.my.descr.fctriPad 1#0` -2.070e-01
## `prdl.my.descr.fctriPad 1#1` -4.760e-01
## `prdl.my.descr.fctriPad 2#0` 1.083e+00
## `prdl.my.descr.fctriPad 2#1` 5.819e-01
## `prdl.my.descr.fctriPad 3+#0` 5.832e-01
## `prdl.my.descr.fctriPad 3+#1` -6.557e-02
## `prdl.my.descr.fctriPadAir#0` 3.586e-02
## `prdl.my.descr.fctriPadAir#1` 2.609e-01
## `prdl.my.descr.fctriPadmini 2+#0` -2.463e-01
## `prdl.my.descr.fctriPadmini 2+#1` 1.483e-01
## `prdl.my.descr.fctriPadmini#0` 1.832e-01
## `prdl.my.descr.fctriPadmini#1` -3.643e-01
## D.npnct13.log -3.907e-01
## color.fctrGold -8.959e-02
## `color.fctrSpace Gray` -3.180e-01
## color.fctrUnknown -3.474e-01
## color.fctrWhite -8.177e-02
## D.npnct08.log -1.418e-01
## D.npnct16.log 1.565e+00
## D.npnct24.log 2.079e-01
## D.nstopwrds.log 2.343e-01
## D.npnct06.log -3.352e+00
## D.npnct28.log -3.704e-02
## D.nuppr.log -1.057e-01
## D.npnct12.log -1.324e+00
## D.npnct09.log -2.066e+00
## D.ndgts.log 4.877e-01
## D.nwrds.unq.log -2.573e-01
## D.terms.n.post.stem.log -2.573e-01
## D.terms.n.post.stop.log -2.581e-01
## D.npnct14.log -7.400e-01
## D.terms.n.post.stem -6.054e-02
## D.terms.n.post.stop -6.215e-02
## D.npnct05.log -2.834e+00
## `condition.fctrFor parts or not working` 1.488e-01
## `condition.fctrManufacturer refurbished` 3.869e-01
## condition.fctrNew -3.767e-01
## `condition.fctrNew other (see details)` 5.646e-01
## `condition.fctrSeller refurbished` -4.871e-01
## idseq.my -2.957e-04
## D.ratio.sum.TfIdf.nwrds -6.310e-01
## D.TfIdf.sum.stem.stop.Ratio -1.055e+00
## D.npnct15.log 2.708e+00
## D.npnct03.log 2.293e+00
## D.nwrds.log -3.197e-02
## D.nchrs.log -5.459e-02
## startprice.diff -5.199e-03
## biddable 4.116e+00
## cellular.fctr1 -4.187e-02
## cellular.fctrUnknown -9.672e-02
## carrier.fctrNone 1.249e-01
## carrier.fctrOther 6.708e-01
## carrier.fctrSprint 1.650e-01
## `carrier.fctrT-Mobile` -2.979e-01
## carrier.fctrUnknown -1.697e-01
## carrier.fctrVerizon 2.657e-01
## `prdl.my.descr.fctrUnknown#1:idseq.my` 1.470e-03
## `prdl.my.descr.fctriPad 1#0:idseq.my` 7.129e-04
## `prdl.my.descr.fctriPad 1#1:idseq.my` 5.414e-04
## `prdl.my.descr.fctriPad 2#0:idseq.my` -8.161e-04
## `prdl.my.descr.fctriPad 2#1:idseq.my` 7.509e-05
## `prdl.my.descr.fctriPad 3+#0:idseq.my` -1.910e-04
## `prdl.my.descr.fctriPad 3+#1:idseq.my` 5.453e-04
## `prdl.my.descr.fctriPadAir#0:idseq.my` 2.623e-04
## `prdl.my.descr.fctriPadAir#1:idseq.my` 5.057e-04
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my` 7.326e-04
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my` 7.196e-04
## `prdl.my.descr.fctriPadmini#0:idseq.my` 5.693e-05
## `prdl.my.descr.fctriPadmini#1:idseq.my` -8.027e-05
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds` -6.718e-01
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds` 0.000e+00
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds` 1.531e+00
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds` 0.000e+00
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds` -1.559e+00
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds` 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds` -7.664e-01
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds` 0.000e+00
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds` -7.247e-01
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds` 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds` -2.776e+00
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds` 0.000e+00
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds` 9.971e-01
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio` 1.237e+00
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio` -2.070e-01
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio` -1.298e+00
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio` 1.083e+00
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio` 1.266e-01
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio` 5.832e-01
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio` -3.667e-03
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio` 3.586e-02
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio` 1.157e+00
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio` -2.463e-01
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio` -1.472e+00
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio` 1.832e-01
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio` -2.060e-01
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log` 0.000e+00
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log` 0.000e+00
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log` 3.900e+00
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log` 0.000e+00
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log` 2.195e-01
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log` 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log` -2.244e+00
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log` 0.000e+00
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log` 5.343e-02
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log` 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log` 0.000e+00
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log` 0.000e+00
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log` 0.000e+00
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log` 0.000e+00
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log` 0.000e+00
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log` -3.742e+00
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log` 0.000e+00
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log` -5.586e+00
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log` 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log` -1.920e+00
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log` 0.000e+00
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log` 1.119e+00
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log` 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log` 5.254e-01
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log` 0.000e+00
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log` -1.921e+00
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log` -4.809e-01
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log` 0.000e+00
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log` 5.513e-01
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log` 0.000e+00
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log` -1.688e-01
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log` 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log` -1.856e-01
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log` 0.000e+00
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log` -1.956e-01
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log` 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log` 8.057e-01
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log` 0.000e+00
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log` 2.046e-01
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log` -2.509e-01
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log` 0.000e+00
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log` -5.761e-02
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log` 0.000e+00
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log` 1.331e-01
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log` 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log` 4.468e-02
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log` 0.000e+00
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log` -1.128e-01
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log` 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log` 1.140e-01
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log` 0.000e+00
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log` -1.484e-01
## `startprice.diff:biddable` -3.383e-02
## `cellular.fctr1:carrier.fctrNone` 0.000e+00
## `cellular.fctrUnknown:carrier.fctrNone` 0.000e+00
## `cellular.fctr1:carrier.fctrOther` 6.708e-01
## `cellular.fctrUnknown:carrier.fctrOther` 0.000e+00
## `cellular.fctr1:carrier.fctrSprint` 1.650e-01
## `cellular.fctrUnknown:carrier.fctrSprint` 0.000e+00
## `cellular.fctr1:carrier.fctrT-Mobile` -2.979e-01
## `cellular.fctrUnknown:carrier.fctrT-Mobile` 0.000e+00
## `cellular.fctr1:carrier.fctrUnknown` -8.627e-02
## `cellular.fctrUnknown:carrier.fctrUnknown` -9.672e-02
## `cellular.fctr1:carrier.fctrVerizon` 2.657e-01
## `cellular.fctrUnknown:carrier.fctrVerizon` 0.000e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2` 0.000e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2` 5.553e-01
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2` 0.000e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2` -3.964e-01
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2` 0.000e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2` 1.845e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2` 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2` 6.181e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2` 0.000e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2` 1.487e-02
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2` 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2` -2.392e-01
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2` 0.000e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2` 1.487e-01
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3` 0.000e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3` 4.087e-01
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3` 0.000e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3` -3.567e-01
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3` 0.000e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3` -2.653e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3` 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3` 1.722e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3` 0.000e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3` -4.137e-01
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3` 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` -9.796e-01
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3` 0.000e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3` 8.492e-01
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4` 0.000e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4` 1.353e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4` 0.000e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4` -1.805e-01
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4` 0.000e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4` 9.363e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4` 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4` -1.524e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4` 0.000e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4` -1.162e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4` 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` -3.322e-01
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4` 0.000e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4` 2.879e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5` 0.000e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5` 0.000e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5` 0.000e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5` 9.492e-01
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5` 0.000e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5` 1.138e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5` 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5` 2.090e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5` 0.000e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5` 9.379e-02
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5` 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5` 0.000e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5` 0.000e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5` 1.415e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6` 0.000e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6` 0.000e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6` 0.000e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6` 0.000e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6` 0.000e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6` -9.311e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6` 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6` 3.186e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6` 0.000e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6` 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6` 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6` 0.000e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6` 0.000e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6` 2.961e-01
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7` 0.000e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7` 0.000e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7` 0.000e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7` 0.000e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7` 0.000e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7` 0.000e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7` 0.000e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7` -7.718e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7` 0.000e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7` 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7` 0.000e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7` 0.000e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7` 0.000e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7` -2.535e-01
## Std. Error
## (Intercept) 8.232e+00
## D.ratio.nstopwrds.nwrds 2.607e+00
## D.terms.n.stem.stop.Ratio 6.670e+00
## D.npnct01.log 8.154e-01
## .rnorm 1.032e-01
## storage.fctr16 4.882e-01
## storage.fctr32 5.223e-01
## storage.fctr64 5.132e-01
## storage.fctrUnknown 6.475e-01
## D.npnct11.log 4.297e-01
## D.npnct10.log 1.820e+00
## D.TfIdf.sum.post.stop 3.031e-01
## D.TfIdf.sum.post.stem 3.171e-01
## D.sum.TfIdf 3.171e-01
## `prdl.my.descr.fctrUnknown#1` 2.310e+00
## `prdl.my.descr.fctriPad 1#0` 1.540e+00
## `prdl.my.descr.fctriPad 1#1` 2.311e+00
## `prdl.my.descr.fctriPad 2#0` 1.817e+00
## `prdl.my.descr.fctriPad 2#1` 2.272e+00
## `prdl.my.descr.fctriPad 3+#0` 1.566e+00
## `prdl.my.descr.fctriPad 3+#1` 2.205e+00
## `prdl.my.descr.fctriPadAir#0` 1.527e+00
## `prdl.my.descr.fctriPadAir#1` 2.228e+00
## `prdl.my.descr.fctriPadmini 2+#0` 1.541e+00
## `prdl.my.descr.fctriPadmini 2+#1` 2.430e+00
## `prdl.my.descr.fctriPadmini#0` 1.534e+00
## `prdl.my.descr.fctriPadmini#1` 2.305e+00
## D.npnct13.log 4.233e-01
## color.fctrGold 5.366e-01
## `color.fctrSpace Gray` 3.978e-01
## color.fctrUnknown 2.965e-01
## color.fctrWhite 3.167e-01
## D.npnct08.log 8.697e-01
## D.npnct16.log 2.176e+00
## D.npnct24.log 2.892e+00
## D.nstopwrds.log 7.274e-01
## D.npnct06.log 2.328e+00
## D.npnct28.log 2.221e+00
## D.nuppr.log 5.276e-01
## D.npnct12.log 9.530e-01
## D.npnct09.log 6.909e+00
## D.ndgts.log 4.726e-01
## D.nwrds.unq.log 1.050e+00
## D.terms.n.post.stem.log 1.050e+00
## D.terms.n.post.stop.log 1.047e+00
## D.npnct14.log 9.969e-01
## D.terms.n.post.stem 2.012e-01
## D.terms.n.post.stop 1.990e-01
## D.npnct05.log 1.651e+00
## `condition.fctrFor parts or not working` 4.368e-01
## `condition.fctrManufacturer refurbished` 5.551e-01
## condition.fctrNew 3.257e-01
## `condition.fctrNew other (see details)` 4.835e-01
## `condition.fctrSeller refurbished` 5.210e-01
## idseq.my 5.243e-04
## D.ratio.sum.TfIdf.nwrds 1.319e+00
## D.TfIdf.sum.stem.stop.Ratio 5.258e+00
## D.npnct15.log 5.775e+00
## D.npnct03.log 2.692e+00
## D.nwrds.log 8.200e-01
## D.nchrs.log 5.117e-01
## startprice.diff 1.586e-03
## biddable 3.079e-01
## cellular.fctr1 1.316e+00
## cellular.fctrUnknown 1.751e+00
## carrier.fctrNone 1.316e+00
## carrier.fctrOther 2.079e+00
## carrier.fctrSprint 1.523e+00
## `carrier.fctrT-Mobile` 1.561e+00
## carrier.fctrUnknown 1.332e+00
## carrier.fctrVerizon 1.478e+00
## `prdl.my.descr.fctrUnknown#1:idseq.my` 1.337e-03
## `prdl.my.descr.fctriPad 1#0:idseq.my` 8.322e-04
## `prdl.my.descr.fctriPad 1#1:idseq.my` 9.228e-04
## `prdl.my.descr.fctriPad 2#0:idseq.my` 1.267e-03
## `prdl.my.descr.fctriPad 2#1:idseq.my` 8.597e-04
## `prdl.my.descr.fctriPad 3+#0:idseq.my` 7.783e-04
## `prdl.my.descr.fctriPad 3+#1:idseq.my` 7.792e-04
## `prdl.my.descr.fctriPadAir#0:idseq.my` 7.210e-04
## `prdl.my.descr.fctriPadAir#1:idseq.my` 8.250e-04
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my` 7.506e-04
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my` 1.855e-03
## `prdl.my.descr.fctriPadmini#0:idseq.my` 7.260e-04
## `prdl.my.descr.fctriPadmini#1:idseq.my` 1.007e-03
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds` 1.636e+00
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds` 2.500e+00
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds` 1.981e+00
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds` 2.500e+00
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds` 1.637e+00
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds` 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds` 1.637e+00
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds` 2.500e+00
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds` 1.566e+00
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds` 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds` 3.884e+00
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds` 2.500e+00
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds` 2.003e+00
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio` 4.194e+00
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio` 1.540e+00
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio` 3.829e+00
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio` 1.817e+00
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio` 3.330e+00
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio` 1.566e+00
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio` 3.107e+00
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio` 1.527e+00
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio` 3.302e+00
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio` 1.541e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio` 6.760e+00
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio` 1.534e+00
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio` 3.890e+00
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log` 2.500e+00
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log` 2.500e+00
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log` 9.576e+00
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log` 2.500e+00
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log` 3.330e+00
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log` 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log` 5.879e+00
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log` 2.500e+00
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log` 3.493e+00
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log` 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log` 2.500e+00
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log` 2.500e+00
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log` 2.500e+00
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log` 2.500e+00
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log` 2.500e+00
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log` 2.970e+00
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log` 2.500e+00
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log` 3.083e+00
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log` 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log` 2.817e+00
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log` 2.500e+00
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log` 2.427e+00
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log` 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log` 1.990e+00
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log` 2.500e+00
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log` 3.390e+00
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log` 1.650e+00
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log` 2.500e+00
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log` 1.365e+00
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log` 2.500e+00
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log` 1.147e+00
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log` 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log` 1.098e+00
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log` 2.500e+00
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log` 1.130e+00
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log` 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log` 2.338e+00
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log` 2.500e+00
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log` 1.377e+00
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log` 1.141e+00
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log` 2.500e+00
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log` 9.179e-01
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log` 2.500e+00
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log` 8.052e-01
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log` 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log` 7.224e-01
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log` 2.500e+00
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log` 7.662e-01
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log` 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log` 1.576e+00
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log` 2.500e+00
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log` 9.482e-01
## `startprice.diff:biddable` 4.868e-03
## `cellular.fctr1:carrier.fctrNone` 2.500e+00
## `cellular.fctrUnknown:carrier.fctrNone` 2.500e+00
## `cellular.fctr1:carrier.fctrOther` 2.079e+00
## `cellular.fctrUnknown:carrier.fctrOther` 2.500e+00
## `cellular.fctr1:carrier.fctrSprint` 1.523e+00
## `cellular.fctrUnknown:carrier.fctrSprint` 2.500e+00
## `cellular.fctr1:carrier.fctrT-Mobile` 1.561e+00
## `cellular.fctrUnknown:carrier.fctrT-Mobile` 2.500e+00
## `cellular.fctr1:carrier.fctrUnknown` 1.343e+00
## `cellular.fctrUnknown:carrier.fctrUnknown` 1.751e+00
## `cellular.fctr1:carrier.fctrVerizon` 1.478e+00
## `cellular.fctrUnknown:carrier.fctrVerizon` 2.500e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2` 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2` 9.924e-01
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2` 2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2` 9.688e-01
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2` 2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2` 9.159e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2` 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2` 8.271e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2` 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2` 7.971e-01
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2` 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2` 1.455e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2` 2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2` 1.138e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3` 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3` 1.241e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3` 2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3` 1.154e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3` 2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3` 1.100e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3` 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3` 1.142e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3` 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3` 1.098e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3` 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` 1.532e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3` 2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3` 1.133e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4` 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4` 1.681e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4` 2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4` 1.201e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4` 2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4` 9.894e-01
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4` 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4` 1.606e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4` 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4` 1.080e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4` 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` 2.039e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4` 2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4` 1.838e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5` 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5` 2.500e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5` 2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5` 1.836e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5` 2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5` 1.141e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5` 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5` 8.804e-01
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5` 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5` 1.807e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5` 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5` 2.500e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5` 2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5` 1.215e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6` 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6` 2.500e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6` 2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6` 2.500e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6` 2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6` 1.217e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6` 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6` 1.385e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6` 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6` 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6` 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6` 2.500e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6` 2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6` 1.308e+00
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7` 2.500e+00
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7` 2.500e+00
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7` 2.500e+00
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7` 2.500e+00
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7` 2.500e+00
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7` 2.500e+00
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7` 2.500e+00
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7` 1.768e+00
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7` 2.500e+00
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7` 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7` 2.500e+00
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7` 2.500e+00
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7` 2.500e+00
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7` 1.713e+00
## z value
## (Intercept) -0.322
## D.ratio.nstopwrds.nwrds -0.831
## D.terms.n.stem.stop.Ratio 0.713
## D.npnct01.log -0.293
## .rnorm 0.145
## storage.fctr16 -0.139
## storage.fctr32 0.218
## storage.fctr64 1.279
## storage.fctrUnknown 0.507
## D.npnct11.log -0.170
## D.npnct10.log -0.697
## D.TfIdf.sum.post.stop 0.345
## D.TfIdf.sum.post.stem 0.331
## D.sum.TfIdf 0.331
## `prdl.my.descr.fctrUnknown#1` 0.033
## `prdl.my.descr.fctriPad 1#0` -0.134
## `prdl.my.descr.fctriPad 1#1` -0.206
## `prdl.my.descr.fctriPad 2#0` 0.596
## `prdl.my.descr.fctriPad 2#1` 0.256
## `prdl.my.descr.fctriPad 3+#0` 0.372
## `prdl.my.descr.fctriPad 3+#1` -0.030
## `prdl.my.descr.fctriPadAir#0` 0.023
## `prdl.my.descr.fctriPadAir#1` 0.117
## `prdl.my.descr.fctriPadmini 2+#0` -0.160
## `prdl.my.descr.fctriPadmini 2+#1` 0.061
## `prdl.my.descr.fctriPadmini#0` 0.119
## `prdl.my.descr.fctriPadmini#1` -0.158
## D.npnct13.log -0.923
## color.fctrGold -0.167
## `color.fctrSpace Gray` -0.800
## color.fctrUnknown -1.171
## color.fctrWhite -0.258
## D.npnct08.log -0.163
## D.npnct16.log 0.719
## D.npnct24.log 0.072
## D.nstopwrds.log 0.322
## D.npnct06.log -1.440
## D.npnct28.log -0.017
## D.nuppr.log -0.200
## D.npnct12.log -1.390
## D.npnct09.log -0.299
## D.ndgts.log 1.032
## D.nwrds.unq.log -0.245
## D.terms.n.post.stem.log -0.245
## D.terms.n.post.stop.log -0.247
## D.npnct14.log -0.742
## D.terms.n.post.stem -0.301
## D.terms.n.post.stop -0.312
## D.npnct05.log -1.716
## `condition.fctrFor parts or not working` 0.341
## `condition.fctrManufacturer refurbished` 0.697
## condition.fctrNew -1.157
## `condition.fctrNew other (see details)` 1.168
## `condition.fctrSeller refurbished` -0.935
## idseq.my -0.564
## D.ratio.sum.TfIdf.nwrds -0.478
## D.TfIdf.sum.stem.stop.Ratio -0.201
## D.npnct15.log 0.469
## D.npnct03.log 0.852
## D.nwrds.log -0.039
## D.nchrs.log -0.107
## startprice.diff -3.278
## biddable 13.368
## cellular.fctr1 -0.032
## cellular.fctrUnknown -0.055
## carrier.fctrNone 0.095
## carrier.fctrOther 0.323
## carrier.fctrSprint 0.108
## `carrier.fctrT-Mobile` -0.191
## carrier.fctrUnknown -0.127
## carrier.fctrVerizon 0.180
## `prdl.my.descr.fctrUnknown#1:idseq.my` 1.100
## `prdl.my.descr.fctriPad 1#0:idseq.my` 0.857
## `prdl.my.descr.fctriPad 1#1:idseq.my` 0.587
## `prdl.my.descr.fctriPad 2#0:idseq.my` -0.644
## `prdl.my.descr.fctriPad 2#1:idseq.my` 0.087
## `prdl.my.descr.fctriPad 3+#0:idseq.my` -0.245
## `prdl.my.descr.fctriPad 3+#1:idseq.my` 0.700
## `prdl.my.descr.fctriPadAir#0:idseq.my` 0.364
## `prdl.my.descr.fctriPadAir#1:idseq.my` 0.613
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my` 0.976
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my` 0.388
## `prdl.my.descr.fctriPadmini#0:idseq.my` 0.078
## `prdl.my.descr.fctriPadmini#1:idseq.my` -0.080
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds` -0.411
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds` 0.000
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds` 0.773
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds` 0.000
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds` -0.952
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds` 0.000
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds` -0.468
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds` 0.000
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds` -0.463
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds` 0.000
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds` -0.715
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds` 0.000
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds` 0.498
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio` 0.295
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio` -0.134
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio` -0.339
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio` 0.596
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio` 0.038
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio` 0.372
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio` -0.001
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio` 0.023
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio` 0.351
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio` -0.160
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio` -0.218
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio` 0.119
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio` -0.053
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log` 0.000
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log` 0.000
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log` 0.407
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log` 0.000
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log` 0.066
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log` 0.000
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log` -0.382
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log` 0.000
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log` 0.015
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log` 0.000
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log` 0.000
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log` 0.000
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log` 0.000
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log` 0.000
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log` 0.000
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log` -1.260
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log` 0.000
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log` -1.812
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log` 0.000
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log` -0.681
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log` 0.000
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log` 0.461
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log` 0.000
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log` 0.264
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log` 0.000
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log` -0.567
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log` -0.291
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log` 0.000
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log` 0.404
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log` 0.000
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log` -0.147
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log` 0.000
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log` -0.169
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log` 0.000
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log` -0.173
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log` 0.000
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log` 0.345
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log` 0.000
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log` 0.149
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log` -0.220
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log` 0.000
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log` -0.063
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log` 0.000
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log` 0.165
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log` 0.000
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log` 0.062
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log` 0.000
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log` -0.147
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log` 0.000
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log` 0.072
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log` 0.000
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log` -0.157
## `startprice.diff:biddable` -6.949
## `cellular.fctr1:carrier.fctrNone` 0.000
## `cellular.fctrUnknown:carrier.fctrNone` 0.000
## `cellular.fctr1:carrier.fctrOther` 0.323
## `cellular.fctrUnknown:carrier.fctrOther` 0.000
## `cellular.fctr1:carrier.fctrSprint` 0.108
## `cellular.fctrUnknown:carrier.fctrSprint` 0.000
## `cellular.fctr1:carrier.fctrT-Mobile` -0.191
## `cellular.fctrUnknown:carrier.fctrT-Mobile` 0.000
## `cellular.fctr1:carrier.fctrUnknown` -0.064
## `cellular.fctrUnknown:carrier.fctrUnknown` -0.055
## `cellular.fctr1:carrier.fctrVerizon` 0.180
## `cellular.fctrUnknown:carrier.fctrVerizon` 0.000
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2` 0.000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2` 0.559
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2` 0.000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2` -0.409
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2` 0.000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2` 0.201
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2` 0.000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2` 0.747
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2` 0.000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2` 0.019
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2` 0.000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2` -0.164
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2` 0.000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2` 0.131
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3` 0.000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3` 0.329
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3` 0.000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3` -0.309
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3` 0.000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3` -0.241
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3` 0.000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3` 0.151
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3` 0.000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3` -0.377
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3` 0.000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` -0.640
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3` 0.000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3` 0.749
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4` 0.000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4` 0.805
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4` 0.000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4` -0.150
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4` 0.000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4` 0.946
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4` 0.000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4` -0.949
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4` 0.000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4` -1.076
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4` 0.000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` -0.163
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4` 0.000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4` 1.566
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5` 0.000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5` 0.000
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5` 0.000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5` 0.517
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5` 0.000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5` 0.998
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5` 0.000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5` 2.374
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5` 0.000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5` 0.052
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5` 0.000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5` 0.000
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5` 0.000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5` 1.164
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6` 0.000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6` 0.000
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6` 0.000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6` 0.000
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6` 0.000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6` -0.765
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6` 0.000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6` 0.230
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6` 0.000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6` 0.000
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6` 0.000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6` 0.000
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6` 0.000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6` 0.226
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7` 0.000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7` 0.000
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7` 0.000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7` 0.000
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7` 0.000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7` 0.000
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7` 0.000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7` -0.437
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7` 0.000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7` 0.000
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7` 0.000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7` 0.000
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7` 0.000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7` -0.148
## Pr(>|z|)
## (Intercept) 0.74723
## D.ratio.nstopwrds.nwrds 0.40590
## D.terms.n.stem.stop.Ratio 0.47605
## D.npnct01.log 0.76981
## .rnorm 0.88441
## storage.fctr16 0.88925
## storage.fctr32 0.82773
## storage.fctr64 0.20101
## storage.fctrUnknown 0.61238
## D.npnct11.log 0.86465
## D.npnct10.log 0.48572
## D.TfIdf.sum.post.stop 0.72999
## D.TfIdf.sum.post.stem 0.74034
## D.sum.TfIdf 0.74034
## `prdl.my.descr.fctrUnknown#1` 0.97398
## `prdl.my.descr.fctriPad 1#0` 0.89304
## `prdl.my.descr.fctriPad 1#1` 0.83680
## `prdl.my.descr.fctriPad 2#0` 0.55131
## `prdl.my.descr.fctriPad 2#1` 0.79781
## `prdl.my.descr.fctriPad 3+#0` 0.70954
## `prdl.my.descr.fctriPad 3+#1` 0.97627
## `prdl.my.descr.fctriPadAir#0` 0.98126
## `prdl.my.descr.fctriPadAir#1` 0.90678
## `prdl.my.descr.fctriPadmini 2+#0` 0.87300
## `prdl.my.descr.fctriPadmini 2+#1` 0.95133
## `prdl.my.descr.fctriPadmini#0` 0.90493
## `prdl.my.descr.fctriPadmini#1` 0.87441
## D.npnct13.log 0.35604
## color.fctrGold 0.86741
## `color.fctrSpace Gray` 0.42399
## color.fctrUnknown 0.24145
## color.fctrWhite 0.79628
## D.npnct08.log 0.87048
## D.npnct16.log 0.47192
## D.npnct24.log 0.94270
## D.nstopwrds.log 0.74740
## D.npnct06.log 0.14989
## D.npnct28.log 0.98670
## D.nuppr.log 0.84121
## D.npnct12.log 0.16463
## D.npnct09.log 0.76489
## D.ndgts.log 0.30213
## D.nwrds.unq.log 0.80647
## D.terms.n.post.stem.log 0.80647
## D.terms.n.post.stop.log 0.80527
## D.npnct14.log 0.45789
## D.terms.n.post.stem 0.76350
## D.terms.n.post.stop 0.75478
## D.npnct05.log 0.08608 .
## `condition.fctrFor parts or not working` 0.73344
## `condition.fctrManufacturer refurbished` 0.48580
## condition.fctrNew 0.24746
## `condition.fctrNew other (see details)` 0.24292
## `condition.fctrSeller refurbished` 0.34980
## idseq.my 0.57275
## D.ratio.sum.TfIdf.nwrds 0.63231
## D.TfIdf.sum.stem.stop.Ratio 0.84104
## D.npnct15.log 0.63913
## D.npnct03.log 0.39441
## D.nwrds.log 0.96890
## D.nchrs.log 0.91504
## startprice.diff 0.00105 **
## biddable < 2e-16 ***
## cellular.fctr1 0.97462
## cellular.fctrUnknown 0.95595
## carrier.fctrNone 0.92438
## carrier.fctrOther 0.74700
## carrier.fctrSprint 0.91372
## `carrier.fctrT-Mobile` 0.84870
## carrier.fctrUnknown 0.89861
## carrier.fctrVerizon 0.85734
## `prdl.my.descr.fctrUnknown#1:idseq.my` 0.27126
## `prdl.my.descr.fctriPad 1#0:idseq.my` 0.39165
## `prdl.my.descr.fctriPad 1#1:idseq.my` 0.55742
## `prdl.my.descr.fctriPad 2#0:idseq.my` 0.51966
## `prdl.my.descr.fctriPad 2#1:idseq.my` 0.93040
## `prdl.my.descr.fctriPad 3+#0:idseq.my` 0.80614
## `prdl.my.descr.fctriPad 3+#1:idseq.my` 0.48406
## `prdl.my.descr.fctriPadAir#0:idseq.my` 0.71600
## `prdl.my.descr.fctriPadAir#1:idseq.my` 0.53988
## `prdl.my.descr.fctriPadmini 2+#0:idseq.my` 0.32903
## `prdl.my.descr.fctriPadmini 2+#1:idseq.my` 0.69809
## `prdl.my.descr.fctriPadmini#0:idseq.my` 0.93750
## `prdl.my.descr.fctriPadmini#1:idseq.my` 0.93645
## `prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds` 0.68141
## `prdl.my.descr.fctriPad 1#0:D.ratio.sum.TfIdf.nwrds` 1.00000
## `prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds` 0.43967
## `prdl.my.descr.fctriPad 2#0:D.ratio.sum.TfIdf.nwrds` 1.00000
## `prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds` 0.34093
## `prdl.my.descr.fctriPad 3+#0:D.ratio.sum.TfIdf.nwrds` 1.00000
## `prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds` 0.63967
## `prdl.my.descr.fctriPadAir#0:D.ratio.sum.TfIdf.nwrds` 1.00000
## `prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds` 0.64346
## `prdl.my.descr.fctriPadmini 2+#0:D.ratio.sum.TfIdf.nwrds` 1.00000
## `prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds` 0.47475
## `prdl.my.descr.fctriPadmini#0:D.ratio.sum.TfIdf.nwrds` 1.00000
## `prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds` 0.61871
## `prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio` 0.76809
## `prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio` 0.89304
## `prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio` 0.73458
## `prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio` 0.55131
## `prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio` 0.96967
## `prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio` 0.70954
## `prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio` 0.99906
## `prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio` 0.98126
## `prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio` 0.72596
## `prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio` 0.87300
## `prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio` 0.82758
## `prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio` 0.90493
## `prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio` 0.95777
## `prdl.my.descr.fctrUnknown#1:D.npnct15.log` 1.00000
## `prdl.my.descr.fctriPad 1#0:D.npnct15.log` 1.00000
## `prdl.my.descr.fctriPad 1#1:D.npnct15.log` 0.68377
## `prdl.my.descr.fctriPad 2#0:D.npnct15.log` 1.00000
## `prdl.my.descr.fctriPad 2#1:D.npnct15.log` 0.94744
## `prdl.my.descr.fctriPad 3+#0:D.npnct15.log` 1.00000
## `prdl.my.descr.fctriPad 3+#1:D.npnct15.log` 0.70271
## `prdl.my.descr.fctriPadAir#0:D.npnct15.log` 1.00000
## `prdl.my.descr.fctriPadAir#1:D.npnct15.log` 0.98780
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct15.log` 1.00000
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct15.log` 1.00000
## `prdl.my.descr.fctriPadmini#0:D.npnct15.log` 1.00000
## `prdl.my.descr.fctriPadmini#1:D.npnct15.log` 1.00000
## `prdl.my.descr.fctrUnknown#1:D.npnct03.log` 1.00000
## `prdl.my.descr.fctriPad 1#0:D.npnct03.log` 1.00000
## `prdl.my.descr.fctriPad 1#1:D.npnct03.log` 0.20779
## `prdl.my.descr.fctriPad 2#0:D.npnct03.log` 1.00000
## `prdl.my.descr.fctriPad 2#1:D.npnct03.log` 0.06999 .
## `prdl.my.descr.fctriPad 3+#0:D.npnct03.log` 1.00000
## `prdl.my.descr.fctriPad 3+#1:D.npnct03.log` 0.49563
## `prdl.my.descr.fctriPadAir#0:D.npnct03.log` 1.00000
## `prdl.my.descr.fctriPadAir#1:D.npnct03.log` 0.64495
## `prdl.my.descr.fctriPadmini 2+#0:D.npnct03.log` 1.00000
## `prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log` 0.79182
## `prdl.my.descr.fctriPadmini#0:D.npnct03.log` 1.00000
## `prdl.my.descr.fctriPadmini#1:D.npnct03.log` 0.57090
## `prdl.my.descr.fctrUnknown#1:D.nwrds.log` 0.77078
## `prdl.my.descr.fctriPad 1#0:D.nwrds.log` 1.00000
## `prdl.my.descr.fctriPad 1#1:D.nwrds.log` 0.68627
## `prdl.my.descr.fctriPad 2#0:D.nwrds.log` 1.00000
## `prdl.my.descr.fctriPad 2#1:D.nwrds.log` 0.88300
## `prdl.my.descr.fctriPad 3+#0:D.nwrds.log` 1.00000
## `prdl.my.descr.fctriPad 3+#1:D.nwrds.log` 0.86580
## `prdl.my.descr.fctriPadAir#0:D.nwrds.log` 1.00000
## `prdl.my.descr.fctriPadAir#1:D.nwrds.log` 0.86252
## `prdl.my.descr.fctriPadmini 2+#0:D.nwrds.log` 1.00000
## `prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log` 0.73039
## `prdl.my.descr.fctriPadmini#0:D.nwrds.log` 1.00000
## `prdl.my.descr.fctriPadmini#1:D.nwrds.log` 0.88187
## `prdl.my.descr.fctrUnknown#1:D.nchrs.log` 0.82601
## `prdl.my.descr.fctriPad 1#0:D.nchrs.log` 1.00000
## `prdl.my.descr.fctriPad 1#1:D.nchrs.log` 0.94996
## `prdl.my.descr.fctriPad 2#0:D.nchrs.log` 1.00000
## `prdl.my.descr.fctriPad 2#1:D.nchrs.log` 0.86870
## `prdl.my.descr.fctriPad 3+#0:D.nchrs.log` 1.00000
## `prdl.my.descr.fctriPad 3+#1:D.nchrs.log` 0.95069
## `prdl.my.descr.fctriPadAir#0:D.nchrs.log` 1.00000
## `prdl.my.descr.fctriPadAir#1:D.nchrs.log` 0.88295
## `prdl.my.descr.fctriPadmini 2+#0:D.nchrs.log` 1.00000
## `prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log` 0.94237
## `prdl.my.descr.fctriPadmini#0:D.nchrs.log` 1.00000
## `prdl.my.descr.fctriPadmini#1:D.nchrs.log` 0.87563
## `startprice.diff:biddable` 3.68e-12 ***
## `cellular.fctr1:carrier.fctrNone` 1.00000
## `cellular.fctrUnknown:carrier.fctrNone` 1.00000
## `cellular.fctr1:carrier.fctrOther` 0.74700
## `cellular.fctrUnknown:carrier.fctrOther` 1.00000
## `cellular.fctr1:carrier.fctrSprint` 0.91372
## `cellular.fctrUnknown:carrier.fctrSprint` 1.00000
## `cellular.fctr1:carrier.fctrT-Mobile` 0.84870
## `cellular.fctrUnknown:carrier.fctrT-Mobile` 1.00000
## `cellular.fctr1:carrier.fctrUnknown` 0.94877
## `cellular.fctrUnknown:carrier.fctrUnknown` 0.95595
## `cellular.fctr1:carrier.fctrVerizon` 0.85734
## `cellular.fctrUnknown:carrier.fctrVerizon` 1.00000
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr2` 1.00000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr2` 0.57583
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr2` 1.00000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr2` 0.68243
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr2` 1.00000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr2` 0.84031
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr2` 1.00000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2` 0.45487
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr2` 1.00000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr2` 0.98512
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr2` 1.00000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2` 0.86940
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr2` 1.00000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr2` 0.89599
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr3` 1.00000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr3` 0.74184
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr3` 1.00000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr3` 0.75725
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr3` 1.00000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr3` 0.80950
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr3` 1.00000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3` 0.88015
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr3` 1.00000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr3` 0.70636
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr3` 1.00000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3` 0.52247
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr3` 1.00000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr3` 0.45359
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr4` 1.00000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr4` 0.42093
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr4` 1.00000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr4` 0.88050
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr4` 1.00000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr4` 0.34396
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr4` 1.00000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4` 0.34256
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr4` 1.00000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr4` 0.28199
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr4` 1.00000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4` 0.87061
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr4` 1.00000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr4` 0.11726
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr5` 1.00000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr5` 1.00000
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr5` 1.00000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr5` 0.60521
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr5` 1.00000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr5` 0.31847
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr5` 1.00000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5` 0.01757 *
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr5` 1.00000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr5` 0.95860
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr5` 1.00000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr5` 1.00000
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr5` 1.00000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr5` 0.24430
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr6` 1.00000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr6` 1.00000
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr6` 1.00000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr6` 1.00000
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr6` 1.00000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr6` 0.44435
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr6` 1.00000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6` 0.81802
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr6` 1.00000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr6` 1.00000
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr6` 1.00000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr6` 1.00000
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr6` 1.00000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr6` 0.82097
## `prdl.my.descr.fctrUnknown#0:.clusterid.fctr7` 1.00000
## `prdl.my.descr.fctrUnknown#1:.clusterid.fctr7` 1.00000
## `prdl.my.descr.fctriPad 1#0:.clusterid.fctr7` 1.00000
## `prdl.my.descr.fctriPad 1#1:.clusterid.fctr7` 1.00000
## `prdl.my.descr.fctriPad 2#0:.clusterid.fctr7` 1.00000
## `prdl.my.descr.fctriPad 2#1:.clusterid.fctr7` 1.00000
## `prdl.my.descr.fctriPad 3+#0:.clusterid.fctr7` 1.00000
## `prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7` 0.66243
## `prdl.my.descr.fctriPadAir#0:.clusterid.fctr7` 1.00000
## `prdl.my.descr.fctriPadAir#1:.clusterid.fctr7` 1.00000
## `prdl.my.descr.fctriPadmini 2+#0:.clusterid.fctr7` 1.00000
## `prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr7` 1.00000
## `prdl.my.descr.fctriPadmini#0:.clusterid.fctr7` 1.00000
## `prdl.my.descr.fctriPadmini#1:.clusterid.fctr7` 0.88236
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1337.51 on 968 degrees of freedom
## Residual deviance: 646.38 on 710 degrees of freedom
## AIC: 1164.4
##
## Number of Fisher Scoring iterations: 20
##
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.7112903
## 3 0.2 0.8026949
## 4 0.3 0.8430108
## 5 0.4 0.8505747
## 6 0.5 0.8397129
## 7 0.6 0.8296296
## 8 0.7 0.8178344
## 9 0.8 0.7911227
## 10 0.9 0.7170868
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.4000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.All.Interact.X.bayesglm.N
## 1 N 469
## 2 Y 77
## sold.fctr.predict.All.Interact.X.bayesglm.Y
## 1 53
## 2 370
## Prediction
## Reference N Y
## N 469 53
## Y 77 370
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.658411e-01 7.290199e-01 8.427522e-01 8.866791e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 3.005592e-105 4.367116e-02
## [1] " calling mypredict_mdl for OOB:"
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.7027027
## 3 0.2 0.7317073
## 4 0.3 0.7519819
## 5 0.4 0.7768396
## 6 0.5 0.7949367
## 7 0.6 0.7885117
## 8 0.7 0.7783784
## 9 0.8 0.7612360
## 10 0.9 0.7000000
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.All.Interact.X.bayesglm.N
## 1 N 414
## 2 Y 99
## sold.fctr.predict.All.Interact.X.bayesglm.Y
## 1 63
## 2 314
## Prediction
## Reference N Y
## N 414 63
## Y 99 314
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.179775e-01 6.319103e-01 7.910266e-01 8.428037e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 6.921274e-70 5.962079e-03
## model_id model_method
## 1 All.Interact.X.bayesglm bayesglm
## feats
## 1 D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 1 6.633 2.293
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.9286388 0.4 0.8505747 0.7925697
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.8427522 0.8866791 0.580097 0.8660362
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.5 0.7949367 0.8179775
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB min.aic.fit
## 1 0.7910266 0.8428037 0.6319103 1164.383
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.03516352 0.07034406
## label step_major step_minor bgn end elapsed
## 8 fit.models_1_bayesglm 8 0 157.328 167.609 10.281
## 9 fit.models_1_glmnet 9 0 167.610 NA NA
## [1] "fitting model: All.Interact.X.glmnet"
## [1] " indep_vars: D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.55, lambda = 0.0559 on full training set
## Warning in myfit_mdl(model_id = model_id, model_method = method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: lambda
## Length Class Mode
## a0 93 -none- numeric
## beta 23994 dgCMatrix S4
## df 93 -none- numeric
## dim 2 -none- numeric
## lambda 93 -none- numeric
## dev.ratio 93 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 258 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -0.9197893866
## prdl.my.descr.fctriPad 2#0
## 0.1124207463
## prdl.my.descr.fctriPad 3+#0
## 0.0286697832
## D.terms.n.post.stem
## -0.0003862821
## D.terms.n.post.stop
## -0.0056444603
## D.npnct05.log
## -0.3320068978
## condition.fctrNew
## -0.0856349394
## startprice.diff
## -0.0026676704
## biddable
## 2.1997091789
## prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio
## 0.1087054174
## prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio
## 0.0260983081
## prdl.my.descr.fctriPad 1#1:D.npnct15.log
## 0.4788520325
## startprice.diff:biddable
## -0.0077048034
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## 3.505430e+00
## D.ratio.nstopwrds.nwrds
## -7.485809e+00
## D.terms.n.stem.stop.Ratio
## 3.890518e+00
## D.npnct01.log
## -4.947610e-01
## .rnorm
## 1.444546e-02
## storage.fctr16
## -1.822934e-02
## storage.fctr32
## 2.306846e-01
## storage.fctr64
## 7.253402e-01
## storage.fctrUnknown
## 3.418000e-01
## D.npnct11.log
## -4.513656e-02
## D.npnct10.log
## -1.059943e+00
## D.TfIdf.sum.post.stop
## 1.508231e-01
## D.TfIdf.sum.post.stem
## 1.454770e-01
## D.sum.TfIdf
## 1.118016e-01
## prdl.my.descr.fctrUnknown#1
## -1.146050e-01
## prdl.my.descr.fctriPad 1#0
## -2.712859e-01
## prdl.my.descr.fctriPad 1#1
## -6.344868e+00
## prdl.my.descr.fctriPad 2#0
## 1.446729e+00
## prdl.my.descr.fctriPad 2#1
## 6.829306e+00
## prdl.my.descr.fctriPad 3+#0
## 5.985366e-01
## prdl.my.descr.fctriPad 3+#1
## -3.607925e-01
## prdl.my.descr.fctriPadAir#0
## 3.721328e-02
## prdl.my.descr.fctriPadAir#1
## 8.712179e-01
## prdl.my.descr.fctriPadmini 2+#0
## -2.330970e-01
## prdl.my.descr.fctriPadmini 2+#1
## 3.464816e+00
## prdl.my.descr.fctriPadmini#0
## 1.511328e-01
## prdl.my.descr.fctriPadmini#1
## -4.872842e+00
## D.npnct13.log
## -5.743303e-01
## color.fctrGold
## -1.754049e-01
## color.fctrSpace Gray
## -3.258811e-01
## color.fctrUnknown
## -3.811892e-01
## color.fctrWhite
## -8.173930e-02
## D.npnct08.log
## -1.620891e-01
## D.npnct16.log
## 2.466960e+00
## D.nstopwrds.log
## 1.695590e+00
## D.npnct06.log
## -4.930549e+00
## D.nuppr.log
## -4.584746e-01
## D.npnct12.log
## -1.209235e+00
## D.npnct09.log
## -2.675262e+00
## D.ndgts.log
## 3.918722e-01
## D.nwrds.unq.log
## -8.412111e-01
## D.terms.n.post.stem.log
## -6.302781e-01
## D.terms.n.post.stop.log
## -4.619317e-01
## D.npnct14.log
## -5.909816e-01
## D.terms.n.post.stem
## -1.921138e-03
## D.terms.n.post.stop
## -5.057612e-02
## D.npnct05.log
## -3.556868e+00
## condition.fctrFor parts or not working
## 1.944876e-01
## condition.fctrManufacturer refurbished
## 3.955795e-01
## condition.fctrNew
## -3.672162e-01
## condition.fctrNew other (see details)
## 6.057331e-01
## condition.fctrSeller refurbished
## -5.083360e-01
## idseq.my
## -3.522800e-04
## D.ratio.sum.TfIdf.nwrds
## -9.770894e-01
## D.TfIdf.sum.stem.stop.Ratio
## -1.139681e+00
## D.npnct15.log
## 4.670504e-01
## D.nwrds.log
## -4.159284e-01
## D.nchrs.log
## -1.421896e-01
## startprice.diff
## -4.904189e-03
## biddable
## 4.268450e+00
## cellular.fctrUnknown
## -4.562886e-05
## carrier.fctrNone
## 1.951305e-01
## carrier.fctrOther
## 4.731453e+00
## carrier.fctrSprint
## 2.250031e-01
## carrier.fctrT-Mobile
## -4.696237e-01
## carrier.fctrUnknown
## -2.365238e-01
## carrier.fctrVerizon
## 1.912790e-01
## prdl.my.descr.fctrUnknown#1:idseq.my
## 1.450785e-03
## prdl.my.descr.fctriPad 1#0:idseq.my
## 8.426993e-04
## prdl.my.descr.fctriPad 1#1:idseq.my
## 5.980170e-04
## prdl.my.descr.fctriPad 2#0:idseq.my
## -1.308800e-03
## prdl.my.descr.fctriPad 2#1:idseq.my
## 1.008940e-04
## prdl.my.descr.fctriPad 3+#0:idseq.my
## -2.091481e-04
## prdl.my.descr.fctriPad 3+#1:idseq.my
## 5.650835e-04
## prdl.my.descr.fctriPadAir#0:idseq.my
## 3.101896e-04
## prdl.my.descr.fctriPadAir#1:idseq.my
## 5.957307e-04
## prdl.my.descr.fctriPadmini 2+#0:idseq.my
## 8.384266e-04
## prdl.my.descr.fctriPadmini 2+#1:idseq.my
## 6.904061e-04
## prdl.my.descr.fctriPadmini#0:idseq.my
## 1.024183e-04
## prdl.my.descr.fctrUnknown#1:D.ratio.sum.TfIdf.nwrds
## -5.953414e-01
## prdl.my.descr.fctriPad 1#1:D.ratio.sum.TfIdf.nwrds
## 3.470455e+00
## prdl.my.descr.fctriPad 2#1:D.ratio.sum.TfIdf.nwrds
## -2.333921e+00
## prdl.my.descr.fctriPad 3+#1:D.ratio.sum.TfIdf.nwrds
## -1.156997e+00
## prdl.my.descr.fctriPadAir#1:D.ratio.sum.TfIdf.nwrds
## -1.370894e+00
## prdl.my.descr.fctriPadmini 2+#1:D.ratio.sum.TfIdf.nwrds
## -4.856173e+00
## prdl.my.descr.fctriPadmini#1:D.ratio.sum.TfIdf.nwrds
## 2.611292e+00
## prdl.my.descr.fctrUnknown#1:D.TfIdf.sum.stem.stop.Ratio
## 1.430409e+00
## prdl.my.descr.fctriPad 1#0:D.TfIdf.sum.stem.stop.Ratio
## -2.635255e-01
## prdl.my.descr.fctriPad 1#1:D.TfIdf.sum.stem.stop.Ratio
## -7.282856e-01
## prdl.my.descr.fctriPad 2#0:D.TfIdf.sum.stem.stop.Ratio
## 1.428462e+00
## prdl.my.descr.fctriPad 2#1:D.TfIdf.sum.stem.stop.Ratio
## -4.745476e+00
## prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio
## 6.248226e-01
## prdl.my.descr.fctriPad 3+#1:D.TfIdf.sum.stem.stop.Ratio
## 1.008090e+00
## prdl.my.descr.fctriPadAir#0:D.TfIdf.sum.stem.stop.Ratio
## 4.689379e-02
## prdl.my.descr.fctriPadAir#1:D.TfIdf.sum.stem.stop.Ratio
## 3.369365e+00
## prdl.my.descr.fctriPadmini 2+#0:D.TfIdf.sum.stem.stop.Ratio
## -3.230604e-01
## prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio
## -2.605974e+00
## prdl.my.descr.fctriPadmini#0:D.TfIdf.sum.stem.stop.Ratio
## 2.093205e-01
## prdl.my.descr.fctriPadmini#1:D.TfIdf.sum.stem.stop.Ratio
## -2.725530e-04
## prdl.my.descr.fctriPad 1#1:D.npnct15.log
## 7.631379e+00
## prdl.my.descr.fctriPad 2#1:D.npnct15.log
## 6.449691e+00
## prdl.my.descr.fctriPadAir#1:D.npnct15.log
## 3.450945e+00
## prdl.my.descr.fctriPad 1#1:D.npnct03.log
## -1.496541e+00
## prdl.my.descr.fctriPad 2#1:D.npnct03.log
## -3.487919e+00
## prdl.my.descr.fctriPad 3+#1:D.npnct03.log
## -5.816703e+00
## prdl.my.descr.fctriPadAir#1:D.npnct03.log
## 5.543516e+00
## prdl.my.descr.fctriPadmini 2+#1:D.npnct03.log
## 9.796775e+00
## prdl.my.descr.fctriPadmini#1:D.npnct03.log
## 7.826695e-01
## prdl.my.descr.fctrUnknown#1:D.nwrds.log
## -2.545165e-01
## prdl.my.descr.fctriPad 1#1:D.nwrds.log
## 2.056715e+00
## prdl.my.descr.fctriPad 2#1:D.nwrds.log
## -8.771649e-01
## prdl.my.descr.fctriPad 3+#1:D.nwrds.log
## -8.962667e-01
## prdl.my.descr.fctriPadAir#1:D.nwrds.log
## -9.885393e-01
## prdl.my.descr.fctriPadmini 2+#1:D.nwrds.log
## 1.897646e+00
## prdl.my.descr.fctriPadmini#1:D.nwrds.log
## 3.966039e-01
## prdl.my.descr.fctrUnknown#1:D.nchrs.log
## -6.771936e-01
## prdl.my.descr.fctriPad 1#1:D.nchrs.log
## -1.868344e-01
## prdl.my.descr.fctriPad 2#1:D.nchrs.log
## 1.315852e-02
## prdl.my.descr.fctriPad 3+#1:D.nchrs.log
## 1.185281e-01
## prdl.my.descr.fctriPadAir#1:D.nchrs.log
## -3.870701e-01
## prdl.my.descr.fctriPadmini 2+#1:D.nchrs.log
## -9.696981e-01
## startprice.diff:biddable
## -3.663213e-02
## cellular.fctr1:carrier.fctrOther
## 4.563711e+00
## cellular.fctr1:carrier.fctrSprint
## 1.189507e-01
## cellular.fctr1:carrier.fctrT-Mobile
## -2.444122e-01
## cellular.fctrUnknown:carrier.fctrUnknown
## -2.611744e-02
## cellular.fctr1:carrier.fctrVerizon
## 3.135925e-01
## prdl.my.descr.fctrUnknown#1:.clusterid.fctr2
## 9.753772e-01
## prdl.my.descr.fctriPad 1#1:.clusterid.fctr2
## -5.196912e-01
## prdl.my.descr.fctriPad 2#1:.clusterid.fctr2
## 5.806539e-01
## prdl.my.descr.fctriPad 3+#1:.clusterid.fctr2
## 9.373133e-01
## prdl.my.descr.fctriPadAir#1:.clusterid.fctr2
## -1.945250e-01
## prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr2
## -1.011157e+00
## prdl.my.descr.fctriPadmini#1:.clusterid.fctr2
## 1.756957e+00
## prdl.my.descr.fctrUnknown#1:.clusterid.fctr3
## 8.747031e-01
## prdl.my.descr.fctriPad 1#1:.clusterid.fctr3
## -8.707494e-02
## prdl.my.descr.fctriPad 2#1:.clusterid.fctr3
## 4.528562e-02
## prdl.my.descr.fctriPad 3+#1:.clusterid.fctr3
## 4.783447e-01
## prdl.my.descr.fctriPadAir#1:.clusterid.fctr3
## -7.100320e-01
## prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr3
## -2.575884e+00
## prdl.my.descr.fctriPadmini#1:.clusterid.fctr3
## 2.454934e+00
## prdl.my.descr.fctrUnknown#1:.clusterid.fctr4
## 6.315811e+00
## prdl.my.descr.fctriPad 1#1:.clusterid.fctr4
## -3.422748e-01
## prdl.my.descr.fctriPad 2#1:.clusterid.fctr4
## 1.544226e+00
## prdl.my.descr.fctriPad 3+#1:.clusterid.fctr4
## -5.080517e+00
## prdl.my.descr.fctriPadAir#1:.clusterid.fctr4
## -2.005145e+00
## prdl.my.descr.fctriPadmini 2+#1:.clusterid.fctr4
## -3.958597e+00
## prdl.my.descr.fctriPadmini#1:.clusterid.fctr4
## 8.987295e+00
## prdl.my.descr.fctriPad 1#1:.clusterid.fctr5
## 4.350755e+00
## prdl.my.descr.fctriPad 2#1:.clusterid.fctr5
## 2.245772e+00
## prdl.my.descr.fctriPad 3+#1:.clusterid.fctr5
## 2.446895e+00
## prdl.my.descr.fctriPadAir#1:.clusterid.fctr5
## -3.715727e+00
## prdl.my.descr.fctriPadmini#1:.clusterid.fctr5
## 2.909860e+00
## prdl.my.descr.fctriPad 2#1:.clusterid.fctr6
## -1.253413e+00
## prdl.my.descr.fctriPad 3+#1:.clusterid.fctr6
## 5.786273e-01
## prdl.my.descr.fctriPadmini#1:.clusterid.fctr6
## 1.885894e+00
## prdl.my.descr.fctriPad 3+#1:.clusterid.fctr7
## -4.377331e+00
## prdl.my.descr.fctriPadmini#1:.clusterid.fctr7
## 8.571200e-01
## character(0)
## character(0)
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.631355932
## 2 0.1 0.638115632
## 3 0.2 0.667664671
## 4 0.3 0.737279335
## 5 0.4 0.794780546
## 6 0.5 0.804848485
## 7 0.6 0.806451613
## 8 0.7 0.791180285
## 9 0.8 0.466216216
## 10 0.9 0.004464286
## 11 1.0 0.000000000
## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.All.Interact.X.glmnet.N
## 1 N 488
## 2 Y 122
## sold.fctr.predict.All.Interact.X.glmnet.Y
## 1 34
## 2 325
## Prediction
## Reference N Y
## N 488 34
## Y 122 325
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.390093e-01 6.714312e-01 8.143296e-01 8.616078e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 3.427839e-87 3.270658e-12
## [1] " calling mypredict_mdl for OOB:"
## threshold f.score
## 1 0.0 0.63392172
## 2 0.1 0.63783784
## 3 0.2 0.66345382
## 4 0.3 0.73029967
## 5 0.4 0.78526048
## 6 0.5 0.79636835
## 7 0.6 0.80585106
## 8 0.7 0.80497925
## 9 0.8 0.48638838
## 10 0.9 0.01438849
## 11 1.0 0.00000000
## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.All.Interact.X.glmnet.N
## 1 N 441
## 2 Y 110
## sold.fctr.predict.All.Interact.X.glmnet.Y
## 1 36
## 2 303
## Prediction
## Reference N Y
## N 441 36
## Y 110 303
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.359551e-01 6.661923e-01 8.099608e-01 8.597019e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 9.313992e-80 1.526663e-09
## model_id model_method
## 1 All.Interact.X.glmnet glmnet
## feats
## 1 D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 9 14.868 1.855
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.8822461 0.6 0.8064516 0.8307534
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.8143296 0.8616078 0.6555776 0.8742088
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.6 0.8058511 0.8359551
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.8099608 0.8597019 0.6661923
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.02792105 0.05688346
## label step_major step_minor bgn end elapsed
## 9 fit.models_1_glmnet 9 0 167.610 186.666 19.057
## 10 fit.models_1_rpart 10 0 186.667 NA NA
## [1] "fitting model: All.Interact.X.no.rnorm.rpart"
## [1] " indep_vars: D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting cp = 0.00671 on full training set
## Warning in myfit_mdl(model_id = model_id, model_method = method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: cp
## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7,
## cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2,
## surrogatestyle = 0, maxdepth = 30, xval = 0))
## n= 969
##
## CP nsplit rel error
## 1 0.527964206 0 1.0000000
## 2 0.134228188 1 0.4720358
## 3 0.006711409 2 0.3378076
##
## Variable importance
## startprice.diff:biddable
## 36
## biddable
## 32
## startprice.diff
## 17
## idseq.my
## 5
## prdl.my.descr.fctriPad 3+#0:idseq.my
## 2
## prdl.my.descr.fctriPad 3+#0
## 2
## prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio
## 2
## prdl.my.descr.fctriPadmini 2+#0:idseq.my
## 1
## prdl.my.descr.fctriPad 3+#1:idseq.my
## 1
## prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio
## 1
## prdl.my.descr.fctriPadmini 2+#1:idseq.my
## 1
##
## Node number 1: 969 observations, complexity param=0.5279642
## predicted class=N expected loss=0.4613003 P(node) =1
## class counts: 522 447
## probabilities: 0.539 0.461
## left son=2 (539 obs) right son=3 (430 obs)
## Primary splits:
## biddable < 0.5 to the left, improve=151.58290, (0 missing)
## startprice.diff:biddable < -0.01904429 to the right, improve=101.31790, (0 missing)
## startprice.diff < 62.89456 to the right, improve= 82.96307, (0 missing)
## idseq.my < 876.5 to the right, improve= 37.84375, (0 missing)
## condition.fctrNew < 0.5 to the right, improve= 16.22579, (0 missing)
## Surrogate splits:
## startprice.diff:biddable < 0.01885349 to the left, agree=0.822, adj=0.600, (0 split)
## idseq.my < 798 to the right, agree=0.628, adj=0.163, (0 split)
## prdl.my.descr.fctriPad 3+#0:idseq.my < 75 to the left, agree=0.589, adj=0.074, (0 split)
## prdl.my.descr.fctriPad 3+#0 < 0.5 to the left, agree=0.586, adj=0.067, (0 split)
## prdl.my.descr.fctriPad 3+#0:D.TfIdf.sum.stem.stop.Ratio < 0.5 to the left, agree=0.586, adj=0.067, (0 split)
##
## Node number 2: 539 observations
## predicted class=N expected loss=0.2115028 P(node) =0.5562436
## class counts: 425 114
## probabilities: 0.788 0.212
##
## Node number 3: 430 observations, complexity param=0.1342282
## predicted class=Y expected loss=0.2255814 P(node) =0.4437564
## class counts: 97 333
## probabilities: 0.226 0.774
## left son=6 (80 obs) right son=7 (350 obs)
## Primary splits:
## startprice.diff < 63.51092 to the right, improve=82.902920, (0 missing)
## startprice.diff:biddable < 63.51092 to the right, improve=82.902920, (0 missing)
## idseq.my < 893.5 to the right, improve=15.999440, (0 missing)
## prdl.my.descr.fctriPad 3+#1:idseq.my < 898.5 to the right, improve= 3.809785, (0 missing)
## prdl.my.descr.fctriPadmini#0:idseq.my < 878.5 to the right, improve= 3.595642, (0 missing)
## Surrogate splits:
## startprice.diff:biddable < 63.51092 to the right, agree=1.000, adj=1.000, (0 split)
## prdl.my.descr.fctriPadmini 2+#0:idseq.my < 1420 to the right, agree=0.826, adj=0.063, (0 split)
## prdl.my.descr.fctriPad 3+#1:idseq.my < 898.5 to the right, agree=0.821, adj=0.038, (0 split)
## prdl.my.descr.fctriPadmini 2+#1:idseq.my < 776 to the right, agree=0.821, adj=0.038, (0 split)
## prdl.my.descr.fctriPadmini 2+#1:D.TfIdf.sum.stem.stop.Ratio < 0.9688673 to the right, agree=0.821, adj=0.038, (0 split)
##
## Node number 6: 80 observations
## predicted class=N expected loss=0.125 P(node) =0.08255934
## class counts: 70 10
## probabilities: 0.875 0.125
##
## Node number 7: 350 observations
## predicted class=Y expected loss=0.07714286 P(node) =0.3611971
## class counts: 27 323
## probabilities: 0.077 0.923
##
## n= 969
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 969 447 N (0.53869969 0.46130031)
## 2) biddable< 0.5 539 114 N (0.78849722 0.21150278) *
## 3) biddable>=0.5 430 97 Y (0.22558140 0.77441860)
## 6) startprice.diff>=63.51092 80 10 N (0.87500000 0.12500000) *
## 7) startprice.diff< 63.51092 350 27 Y (0.07714286 0.92285714) *
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.6313559
## 3 0.2 0.6541916
## 4 0.3 0.8105395
## 5 0.4 0.8105395
## 6 0.5 0.8105395
## 7 0.6 0.8105395
## 8 0.7 0.8105395
## 9 0.8 0.8105395
## 10 0.9 0.8105395
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rpart.N
## 1 N 495
## 2 Y 124
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.Y
## 1 27
## 2 323
## Prediction
## Reference N Y
## N 495 27
## Y 124 323
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.441692e-01 6.814949e-01 8.197763e-01 8.664485e-01 5.386997e-01
## AccuracyPValue McnemarPValue
## 1.762753e-90 5.612287e-15
## [1] " calling mypredict_mdl for OOB:"
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.6339217
## 3 0.2 0.6633907
## 4 0.3 0.8102981
## 5 0.4 0.8102981
## 6 0.5 0.8102981
## 7 0.6 0.8102981
## 8 0.7 0.8102981
## 9 0.8 0.8102981
## 10 0.9 0.8102981
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rpart.N
## 1 N 451
## 2 Y 114
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.Y
## 1 26
## 2 299
## Prediction
## Reference N Y
## N 451 26
## Y 114 299
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.426966e-01 6.791719e-01 8.170871e-01 8.660125e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 1.090657e-83 1.940362e-13
## model_id model_method
## 1 All.Interact.X.no.rnorm.rpart rpart
## feats
## 1 D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 3 2.442 0.166
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.8434283 0.9 0.8105395 0.8328173
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.8197763 0.8664485 0.6623529 0.8469855
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.9 0.8102981 0.8426966
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.8170871 0.8660125 0.6791719
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.008191181 0.01513425
## label step_major step_minor bgn end elapsed
## 10 fit.models_1_rpart 10 0 186.667 192.962 6.295
## 11 fit.models_1_rf 11 0 192.963 NA NA
## [1] "fitting model: All.Interact.X.no.rnorm.rf"
## [1] " indep_vars: D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 257 on full training set
## Warning in myfit_mdl(model_id = model_id, model_method = method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: mtry
## Length Class Mode
## call 4 -none- call
## type 1 -none- character
## predicted 969 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 1938 matrix numeric
## oob.times 969 -none- numeric
## classes 2 -none- character
## importance 257 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 969 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 257 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.8555024
## 3 0.2 0.9440338
## 4 0.3 0.9759825
## 5 0.4 0.9988827
## 6 0.5 1.0000000
## 7 0.6 1.0000000
## 8 0.7 0.9700461
## 9 0.8 0.8949320
## 10 0.9 0.8111702
## 11 1.0 0.4315789
## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rf.N
## 1 N 522
## 2 Y NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.Y
## 1 NA
## 2 447
## Prediction
## Reference N Y
## N 522 0
## Y 0 447
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 1.000000e+00 1.000000e+00 9.962003e-01 1.000000e+00 5.386997e-01
## AccuracyPValue McnemarPValue
## 4.731267e-261 NaN
## [1] " calling mypredict_mdl for OOB:"
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.7623574
## 3 0.2 0.7877551
## 4 0.3 0.8017525
## 5 0.4 0.8110599
## 6 0.5 0.8206980
## 7 0.6 0.8280255
## 8 0.7 0.7956403
## 9 0.8 0.7768362
## 10 0.9 0.7170953
## 11 1.0 0.3373494
## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rf.N
## 1 N 430
## 2 Y 88
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.Y
## 1 47
## 2 325
## Prediction
## Reference N Y
## N 430 47
## Y 88 325
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.483146e-01 6.930078e-01 8.230374e-01 8.712595e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 4.580090e-87 5.760403e-04
## model_id model_method
## 1 All.Interact.X.no.rnorm.rf rf
## feats
## 1 D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 3 40.125 17.55
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 1 0.6 1 0.8369453
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.9962003 1 0.6693933 0.9142644
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.6 0.8280255 0.8483146
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.8230374 0.8712595 0.6930078
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.01866165 0.03765593
# User specified
# Ensure at least 2 vars in each regression; else varImp crashes
# sav_models_lst <- glb_models_lst; sav_models_df <- glb_models_df; sav_featsimp_df <- glb_featsimp_df
# glb_models_lst <- sav_models_lst; glb_models_df <- sav_models_df; glm_featsimp_df <- sav_featsimp_df
# easier to exclude features
# require(gdata) # needed for trim
# model_id <- "";
# indep_vars_vctr <- head(subset(glb_models_df, grepl("All\\.X\\.", model_id), select=feats)
# , 1)[, "feats"]
# indep_vars_vctr <- trim(unlist(strsplit(indep_vars_vctr, "[,]")))
# indep_vars_vctr <- setdiff(indep_vars_vctr, ".rnorm")
# easier to include features
#stop(here"); sav_models_df <- glb_models_df; glb_models_df <- sav_models_df
# !_sp
# model_id <- "csm"; indep_vars_vctr <- c(NULL
# ,"prdline.my.fctr", "prdline.my.fctr:.clusterid.fctr"
# ,"prdline.my.fctr*biddable"
# #,"prdline.my.fctr*startprice.log"
# #,"prdline.my.fctr*startprice.diff"
# #,"prdline.my.fctr*idseq.my"
# ,"prdline.my.fctr*condition.fctr"
# ,"prdline.my.fctr*D.terms.n.post.stop"
# #,"prdline.my.fctr*D.terms.n.post.stem"
# ,"prdline.my.fctr*cellular.fctr"
# # ,"<feat1>:<feat2>"
# )
# for (method in glb_models_method_vctr) {
# ret_lst <- myfit_mdl(model_id=model_id, model_method=method,
# indep_vars_vctr=indep_vars_vctr,
# model_type=glb_model_type,
# rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
# fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
# n_cv_folds=glb_n_cv_folds, tune_models_df=glb_tune_models_df)
# csm_mdl_id <- paste0(model_id, ".", method)
# csm_featsimp_df <- myget_feats_importance(glb_models_lst[[paste0(model_id, ".",
# method)]]); print(head(csm_featsimp_df))
# }
###
# Ntv.1.lm <- lm(reformulate(indep_vars_vctr, glb_rsp_var), glb_trnobs_df); print(summary(Ntv.1.lm))
#csm_featsimp_df[grepl("H.npnct19.log", row.names(csm_featsimp_df)), , FALSE]
#print(sprintf("%s OOB confusion matrix & accuracy: ", csm_mdl_id)); print(t(confusionMatrix(csm_OOBobs_df[, paste0(glb_rsp_var_out, csm_mdl_id)], csm_OOBobs_df[, glb_rsp_var])$table))
#glb_models_df[, "max.Accuracy.OOB", FALSE]
#varImp(glb_models_lst[["Low.cor.X.glm"]])
#orderBy(~ -Overall, varImp(glb_models_lst[["All.X.2.glm"]])$importance)
#orderBy(~ -Overall, varImp(glb_models_lst[["All.X.3.glm"]])$importance)
#glb_feats_df[grepl("npnct28", glb_feats_df$id), ]
# User specified bivariate models
# indep_vars_vctr_lst <- list()
# for (feat in setdiff(names(glb_fitobs_df),
# union(glb_rsp_var, glb_exclude_vars_as_features)))
# indep_vars_vctr_lst[["feat"]] <- feat
# User specified combinatorial models
# indep_vars_vctr_lst <- list()
# combn_mtrx <- combn(c("<feat1_name>", "<feat2_name>", "<featn_name>"),
# <num_feats_to_choose>)
# for (combn_ix in 1:ncol(combn_mtrx))
# #print(combn_mtrx[, combn_ix])
# indep_vars_vctr_lst[[combn_ix]] <- combn_mtrx[, combn_ix]
# template for myfit_mdl
# rf is hard-coded in caret to recognize only Accuracy / Kappa evaluation metrics
# only for OOB in trainControl ?
# ret_lst <- myfit_mdl_fn(model_id=paste0(model_id_pfx, ""), model_method=method,
# indep_vars_vctr=indep_vars_vctr,
# rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
# fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
# n_cv_folds=glb_n_cv_folds, tune_models_df=glb_tune_models_df,
# model_loss_mtrx=glb_model_metric_terms,
# model_summaryFunction=glb_model_metric_smmry,
# model_metric=glb_model_metric,
# model_metric_maximize=glb_model_metric_maximize)
# Simplify a model
# fit_df <- glb_fitobs_df; glb_mdl <- step(<complex>_mdl)
# Non-caret models
# rpart_area_mdl <- rpart(reformulate("Area", response=glb_rsp_var),
# data=glb_fitobs_df, #method="class",
# control=rpart.control(cp=0.12),
# parms=list(loss=glb_model_metric_terms))
# print("rpart_sel_wlm_mdl"); prp(rpart_sel_wlm_mdl)
#
print(glb_models_df)
## model_id
## MFO.myMFO_classfr MFO.myMFO_classfr
## Random.myrandom_classfr Random.myrandom_classfr
## Max.cor.Y.cv.0.rpart Max.cor.Y.cv.0.rpart
## Max.cor.Y.cv.0.cp.0.rpart Max.cor.Y.cv.0.cp.0.rpart
## Max.cor.Y.rpart Max.cor.Y.rpart
## Max.cor.Y.glm Max.cor.Y.glm
## Interact.High.cor.Y.glm Interact.High.cor.Y.glm
## Low.cor.X.glm Low.cor.X.glm
## All.X.glm All.X.glm
## All.X.bayesglm All.X.bayesglm
## All.X.glmnet All.X.glmnet
## All.X.no.rnorm.rpart All.X.no.rnorm.rpart
## All.X.no.rnorm.rf All.X.no.rnorm.rf
## All.Interact.X.glm All.Interact.X.glm
## All.Interact.X.bayesglm All.Interact.X.bayesglm
## All.Interact.X.glmnet All.Interact.X.glmnet
## All.Interact.X.no.rnorm.rpart All.Interact.X.no.rnorm.rpart
## All.Interact.X.no.rnorm.rf All.Interact.X.no.rnorm.rf
## model_method
## MFO.myMFO_classfr myMFO_classfr
## Random.myrandom_classfr myrandom_classfr
## Max.cor.Y.cv.0.rpart rpart
## Max.cor.Y.cv.0.cp.0.rpart rpart
## Max.cor.Y.rpart rpart
## Max.cor.Y.glm glm
## Interact.High.cor.Y.glm glm
## Low.cor.X.glm glm
## All.X.glm glm
## All.X.bayesglm bayesglm
## All.X.glmnet glmnet
## All.X.no.rnorm.rpart rpart
## All.X.no.rnorm.rf rf
## All.Interact.X.glm glm
## All.Interact.X.bayesglm bayesglm
## All.Interact.X.glmnet glmnet
## All.Interact.X.no.rnorm.rpart rpart
## All.Interact.X.no.rnorm.rf rf
## feats
## MFO.myMFO_classfr .rnorm
## Random.myrandom_classfr .rnorm
## Max.cor.Y.cv.0.rpart biddable, startprice.diff
## Max.cor.Y.cv.0.cp.0.rpart biddable, startprice.diff
## Max.cor.Y.rpart biddable, startprice.diff
## Max.cor.Y.glm biddable, startprice.diff
## Interact.High.cor.Y.glm biddable, startprice.diff, biddable:D.terms.n.post.stop, biddable:D.TfIdf.sum.post.stem, biddable:D.ratio.nstopwrds.nwrds, biddable:D.npnct06.log, biddable:D.nchrs.log, biddable:D.terms.n.post.stop.log, biddable:cellular.fctr, biddable:D.nwrds.unq.log
## Low.cor.X.glm biddable, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, prdl.my.descr.fctr, color.fctr, D.npnct08.log, D.npnct06.log, D.npnct28.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.glm biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.bayesglm biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.glmnet biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.no.rnorm.rpart biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.no.rnorm.rf biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.glm D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.bayesglm D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.glmnet D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.no.rnorm.rpart D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.no.rnorm.rf D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns min.elapsedtime.everything
## MFO.myMFO_classfr 0 0.367
## Random.myrandom_classfr 0 0.257
## Max.cor.Y.cv.0.rpart 0 0.598
## Max.cor.Y.cv.0.cp.0.rpart 0 0.471
## Max.cor.Y.rpart 3 0.959
## Max.cor.Y.glm 1 0.955
## Interact.High.cor.Y.glm 1 0.991
## Low.cor.X.glm 1 1.741
## All.X.glm 1 2.172
## All.X.bayesglm 1 3.299
## All.X.glmnet 9 6.992
## All.X.no.rnorm.rpart 3 1.834
## All.X.no.rnorm.rf 3 19.691
## All.Interact.X.glm 1 5.369
## All.Interact.X.bayesglm 1 6.633
## All.Interact.X.glmnet 9 14.868
## All.Interact.X.no.rnorm.rpart 3 2.442
## All.Interact.X.no.rnorm.rf 3 40.125
## min.elapsedtime.final max.auc.fit
## MFO.myMFO_classfr 0.003 0.5000000
## Random.myrandom_classfr 0.002 0.4960722
## Max.cor.Y.cv.0.rpart 0.012 0.5000000
## Max.cor.Y.cv.0.cp.0.rpart 0.008 0.9238966
## Max.cor.Y.rpart 0.012 0.8434283
## Max.cor.Y.glm 0.013 0.8591461
## Interact.High.cor.Y.glm 0.015 0.8617390
## Low.cor.X.glm 0.390 0.9028388
## All.X.glm 0.506 0.9069917
## All.X.bayesglm 0.772 0.9042146
## All.X.glmnet 1.523 0.8677904
## All.X.no.rnorm.rpart 0.096 0.8434283
## All.X.no.rnorm.rf 7.227 1.0000000
## All.Interact.X.glm 2.009 0.7954370
## All.Interact.X.bayesglm 2.293 0.9286388
## All.Interact.X.glmnet 1.855 0.8822461
## All.Interact.X.no.rnorm.rpart 0.166 0.8434283
## All.Interact.X.no.rnorm.rf 17.550 1.0000000
## opt.prob.threshold.fit max.f.score.fit
## MFO.myMFO_classfr 0.5 0.0000000
## Random.myrandom_classfr 0.4 0.6313559
## Max.cor.Y.cv.0.rpart 0.5 0.0000000
## Max.cor.Y.cv.0.cp.0.rpart 0.4 0.8603352
## Max.cor.Y.rpart 0.9 0.8105395
## Max.cor.Y.glm 0.7 0.8059333
## Interact.High.cor.Y.glm 0.6 0.7908102
## Low.cor.X.glm 0.5 0.8101852
## All.X.glm 0.5 0.8258362
## All.X.bayesglm 0.5 0.8175520
## All.X.glmnet 0.6 0.8029021
## All.X.no.rnorm.rpart 0.9 0.8105395
## All.X.no.rnorm.rf 0.6 1.0000000
## All.Interact.X.glm 0.9 0.7926829
## All.Interact.X.bayesglm 0.4 0.8505747
## All.Interact.X.glmnet 0.6 0.8064516
## All.Interact.X.no.rnorm.rpart 0.9 0.8105395
## All.Interact.X.no.rnorm.rf 0.6 1.0000000
## max.Accuracy.fit max.AccuracyLower.fit
## MFO.myMFO_classfr 0.5386997 0.5067192
## Random.myrandom_classfr 0.4613003 0.4295557
## Max.cor.Y.cv.0.rpart 0.5386997 0.5067192
## Max.cor.Y.cv.0.cp.0.rpart 0.8710010 0.8482486
## Max.cor.Y.rpart 0.8276574 0.8197763
## Max.cor.Y.glm 0.7987616 0.8132413
## Interact.High.cor.Y.glm 0.7997936 0.7958716
## Low.cor.X.glm 0.7688338 0.8056321
## All.X.glm 0.7647059 0.8197763
## All.X.bayesglm 0.7770898 0.8121533
## All.X.glmnet 0.8008256 0.8067182
## All.X.no.rnorm.rpart 0.8338493 0.8197763
## All.X.no.rnorm.rf 0.8482972 0.9962003
## All.Interact.X.glm 0.7471620 0.7624276
## All.Interact.X.bayesglm 0.7925697 0.8427522
## All.Interact.X.glmnet 0.8307534 0.8143296
## All.Interact.X.no.rnorm.rpart 0.8328173 0.8197763
## All.Interact.X.no.rnorm.rf 0.8369453 0.9962003
## max.AccuracyUpper.fit max.Kappa.fit
## MFO.myMFO_classfr 0.5704443 0.0000000
## Random.myrandom_classfr 0.4932808 0.0000000
## Max.cor.Y.cv.0.rpart 0.5704443 0.0000000
## Max.cor.Y.cv.0.cp.0.rpart 0.8914697 0.7404889
## Max.cor.Y.rpart 0.8664485 0.6497643
## Max.cor.Y.glm 0.8606386 0.5929577
## Interact.High.cor.Y.glm 0.8450880 0.5943920
## Low.cor.X.glm 0.8538452 0.5344407
## All.X.glm 0.8664485 0.5250728
## All.X.bayesglm 0.8596691 0.5506703
## All.X.glmnet 0.8548166 0.5969700
## All.X.no.rnorm.rpart 0.8664485 0.6645079
## All.X.no.rnorm.rf 1.0000000 0.6925622
## All.Interact.X.glm 0.8147476 0.4884840
## All.Interact.X.bayesglm 0.8866791 0.5800970
## All.Interact.X.glmnet 0.8616078 0.6555776
## All.Interact.X.no.rnorm.rpart 0.8664485 0.6623529
## All.Interact.X.no.rnorm.rf 1.0000000 0.6693933
## max.auc.OOB opt.prob.threshold.OOB
## MFO.myMFO_classfr 0.5000000 0.5
## Random.myrandom_classfr 0.5185354 0.4
## Max.cor.Y.cv.0.rpart 0.5000000 0.5
## Max.cor.Y.cv.0.cp.0.rpart 0.8997924 0.3
## Max.cor.Y.rpart 0.8469855 0.9
## Max.cor.Y.glm 0.8659702 0.7
## Interact.High.cor.Y.glm 0.8576352 0.6
## Low.cor.X.glm 0.8382546 0.5
## All.X.glm 0.8308232 0.5
## All.X.bayesglm 0.8427064 0.5
## All.X.glmnet 0.8560007 0.7
## All.X.no.rnorm.rpart 0.8469855 0.9
## All.X.no.rnorm.rf 0.9180131 0.5
## All.Interact.X.glm 0.6856640 0.9
## All.Interact.X.bayesglm 0.8660362 0.5
## All.Interact.X.glmnet 0.8742088 0.6
## All.Interact.X.no.rnorm.rpart 0.8469855 0.9
## All.Interact.X.no.rnorm.rf 0.9142644 0.6
## max.f.score.OOB max.Accuracy.OOB
## MFO.myMFO_classfr 0.0000000 0.5359551
## Random.myrandom_classfr 0.6339217 0.4640449
## Max.cor.Y.cv.0.rpart 0.0000000 0.5359551
## Max.cor.Y.cv.0.cp.0.rpart 0.8130841 0.8202247
## Max.cor.Y.rpart 0.8102981 0.8426966
## Max.cor.Y.glm 0.8047809 0.8348315
## Interact.High.cor.Y.glm 0.7865459 0.8146067
## Low.cor.X.glm 0.7600487 0.7786517
## All.X.glm 0.7545788 0.7741573
## All.X.bayesglm 0.7641278 0.7842697
## All.X.glmnet 0.7956104 0.8325843
## All.X.no.rnorm.rpart 0.8102981 0.8426966
## All.X.no.rnorm.rf 0.8294479 0.8438202
## All.Interact.X.glm 0.6898803 0.6797753
## All.Interact.X.bayesglm 0.7949367 0.8179775
## All.Interact.X.glmnet 0.8058511 0.8359551
## All.Interact.X.no.rnorm.rpart 0.8102981 0.8426966
## All.Interact.X.no.rnorm.rf 0.8280255 0.8483146
## max.AccuracyLower.OOB max.AccuracyUpper.OOB
## MFO.myMFO_classfr 0.5025561 0.5691153
## Random.myrandom_classfr 0.4308847 0.4974439
## Max.cor.Y.cv.0.rpart 0.5025561 0.5691153
## Max.cor.Y.cv.0.cp.0.rpart 0.7933882 0.8449213
## Max.cor.Y.rpart 0.8170871 0.8660125
## Max.cor.Y.glm 0.8087745 0.8586487
## Interact.High.cor.Y.glm 0.7874870 0.8396247
## Low.cor.X.glm 0.7499158 0.8055293
## All.X.glm 0.7452413 0.8012453
## All.X.bayesglm 0.7557654 0.8108777
## All.X.glmnet 0.8064031 0.8565410
## All.X.no.rnorm.rpart 0.8170871 0.8660125
## All.X.no.rnorm.rf 0.8182763 0.8670627
## All.Interact.X.glm 0.6480036 0.7103515
## All.Interact.X.bayesglm 0.7910266 0.8428037
## All.Interact.X.glmnet 0.8099608 0.8597019
## All.Interact.X.no.rnorm.rpart 0.8170871 0.8660125
## All.Interact.X.no.rnorm.rf 0.8230374 0.8712595
## max.Kappa.OOB max.AccuracySD.fit
## MFO.myMFO_classfr 0.0000000 NA
## Random.myrandom_classfr 0.0000000 NA
## Max.cor.Y.cv.0.rpart 0.0000000 NA
## Max.cor.Y.cv.0.cp.0.rpart 0.6403332 NA
## Max.cor.Y.rpart 0.6791719 0.010872708
## Max.cor.Y.glm 0.6639612 0.038792458
## Interact.High.cor.Y.glm 0.6240496 0.037194678
## Low.cor.X.glm 0.5546405 0.019662083
## All.X.glm 0.5454499 0.029856504
## All.X.bayesglm 0.5654496 0.034475321
## All.X.glmnet 0.6580401 0.036936079
## All.X.no.rnorm.rpart 0.6791719 0.008937311
## All.X.no.rnorm.rf 0.6854548 0.008191181
## All.Interact.X.glm 0.3658021 0.013960526
## All.Interact.X.bayesglm 0.6319103 0.035163519
## All.Interact.X.glmnet 0.6661923 0.027921051
## All.Interact.X.no.rnorm.rpart 0.6791719 0.008191181
## All.Interact.X.no.rnorm.rf 0.6930078 0.018661653
## max.KappaSD.fit min.aic.fit
## MFO.myMFO_classfr NA NA
## Random.myrandom_classfr NA NA
## Max.cor.Y.cv.0.rpart NA NA
## Max.cor.Y.cv.0.cp.0.rpart NA NA
## Max.cor.Y.rpart 0.02515063 NA
## Max.cor.Y.glm 0.07822035 883.4623
## Interact.High.cor.Y.glm 0.07559072 887.8417
## Low.cor.X.glm 0.03929384 914.1270
## All.X.glm 0.05995988 931.5575
## All.X.bayesglm 0.06919184 1056.6761
## All.X.glmnet 0.07483640 NA
## All.X.no.rnorm.rpart 0.01629107 NA
## All.X.no.rnorm.rf 0.01601525 NA
## All.Interact.X.glm 0.02568968 14993.8106
## All.Interact.X.bayesglm 0.07034406 1164.3831
## All.Interact.X.glmnet 0.05688346 NA
## All.Interact.X.no.rnorm.rpart 0.01513425 NA
## All.Interact.X.no.rnorm.rf 0.03765593 NA
rm(ret_lst)
fit.models_1_chunk_df <- myadd_chunk(fit.models_1_chunk_df, "fit.models_1_end",
major.inc=TRUE)
## label step_major step_minor bgn end elapsed
## 11 fit.models_1_rf 11 0 192.963 236.646 43.683
## 12 fit.models_1_end 12 0 236.647 NA NA
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.models", major.inc=FALSE)
## label step_major step_minor bgn end elapsed
## 11 fit.models 7 1 92.533 236.654 144.122
## 12 fit.models 7 2 236.655 NA NA
if (!is.null(glb_model_metric_smmry)) {
stats_df <- glb_models_df[, "model_id", FALSE]
stats_mdl_df <- data.frame()
for (model_id in stats_df$model_id) {
stats_mdl_df <- rbind(stats_mdl_df,
mypredict_mdl(glb_models_lst[[model_id]], glb_fitobs_df, glb_rsp_var,
glb_rsp_var_out, model_id, "fit",
glb_model_metric_smmry, glb_model_metric,
glb_model_metric_maximize, ret_type="stats"))
}
stats_df <- merge(stats_df, stats_mdl_df, all.x=TRUE)
stats_mdl_df <- data.frame()
for (model_id in stats_df$model_id) {
stats_mdl_df <- rbind(stats_mdl_df,
mypredict_mdl(glb_models_lst[[model_id]], glb_OOBobs_df, glb_rsp_var,
glb_rsp_var_out, model_id, "OOB",
glb_model_metric_smmry, glb_model_metric,
glb_model_metric_maximize, ret_type="stats"))
}
stats_df <- merge(stats_df, stats_mdl_df, all.x=TRUE)
print("Merging following data into glb_models_df:")
print(stats_mrg_df <- stats_df[, c(1, grep(glb_model_metric, names(stats_df)))])
print(tmp_models_df <- orderBy(~model_id, glb_models_df[, c("model_id",
grep(glb_model_metric, names(stats_df), value=TRUE))]))
tmp2_models_df <- glb_models_df[, c("model_id", setdiff(names(glb_models_df),
grep(glb_model_metric, names(stats_df), value=TRUE)))]
tmp3_models_df <- merge(tmp2_models_df, stats_mrg_df, all.x=TRUE, sort=FALSE)
print(tmp3_models_df)
print(names(tmp3_models_df))
print(glb_models_df <- subset(tmp3_models_df, select=-model_id.1))
}
plt_models_df <- glb_models_df[, -grep("SD|Upper|Lower", names(glb_models_df))]
for (var in grep("^min.", names(plt_models_df), value=TRUE)) {
plt_models_df[, sub("min.", "inv.", var)] <-
#ifelse(all(is.na(tmp <- plt_models_df[, var])), NA, 1.0 / tmp)
1.0 / plt_models_df[, var]
plt_models_df <- plt_models_df[ , -grep(var, names(plt_models_df))]
}
print(plt_models_df)
## model_id
## MFO.myMFO_classfr MFO.myMFO_classfr
## Random.myrandom_classfr Random.myrandom_classfr
## Max.cor.Y.cv.0.rpart Max.cor.Y.cv.0.rpart
## Max.cor.Y.cv.0.cp.0.rpart Max.cor.Y.cv.0.cp.0.rpart
## Max.cor.Y.rpart Max.cor.Y.rpart
## Max.cor.Y.glm Max.cor.Y.glm
## Interact.High.cor.Y.glm Interact.High.cor.Y.glm
## Low.cor.X.glm Low.cor.X.glm
## All.X.glm All.X.glm
## All.X.bayesglm All.X.bayesglm
## All.X.glmnet All.X.glmnet
## All.X.no.rnorm.rpart All.X.no.rnorm.rpart
## All.X.no.rnorm.rf All.X.no.rnorm.rf
## All.Interact.X.glm All.Interact.X.glm
## All.Interact.X.bayesglm All.Interact.X.bayesglm
## All.Interact.X.glmnet All.Interact.X.glmnet
## All.Interact.X.no.rnorm.rpart All.Interact.X.no.rnorm.rpart
## All.Interact.X.no.rnorm.rf All.Interact.X.no.rnorm.rf
## model_method
## MFO.myMFO_classfr myMFO_classfr
## Random.myrandom_classfr myrandom_classfr
## Max.cor.Y.cv.0.rpart rpart
## Max.cor.Y.cv.0.cp.0.rpart rpart
## Max.cor.Y.rpart rpart
## Max.cor.Y.glm glm
## Interact.High.cor.Y.glm glm
## Low.cor.X.glm glm
## All.X.glm glm
## All.X.bayesglm bayesglm
## All.X.glmnet glmnet
## All.X.no.rnorm.rpart rpart
## All.X.no.rnorm.rf rf
## All.Interact.X.glm glm
## All.Interact.X.bayesglm bayesglm
## All.Interact.X.glmnet glmnet
## All.Interact.X.no.rnorm.rpart rpart
## All.Interact.X.no.rnorm.rf rf
## feats
## MFO.myMFO_classfr .rnorm
## Random.myrandom_classfr .rnorm
## Max.cor.Y.cv.0.rpart biddable, startprice.diff
## Max.cor.Y.cv.0.cp.0.rpart biddable, startprice.diff
## Max.cor.Y.rpart biddable, startprice.diff
## Max.cor.Y.glm biddable, startprice.diff
## Interact.High.cor.Y.glm biddable, startprice.diff, biddable:D.terms.n.post.stop, biddable:D.TfIdf.sum.post.stem, biddable:D.ratio.nstopwrds.nwrds, biddable:D.npnct06.log, biddable:D.nchrs.log, biddable:D.terms.n.post.stop.log, biddable:cellular.fctr, biddable:D.nwrds.unq.log
## Low.cor.X.glm biddable, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, prdl.my.descr.fctr, color.fctr, D.npnct08.log, D.npnct06.log, D.npnct28.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.glm biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.bayesglm biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.glmnet biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, .rnorm, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.no.rnorm.rpart biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.X.no.rnorm.rf biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.glm D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.bayesglm D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.glmnet D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, .rnorm, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.no.rnorm.rpart D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## All.Interact.X.no.rnorm.rf D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns max.auc.fit
## MFO.myMFO_classfr 0 0.5000000
## Random.myrandom_classfr 0 0.4960722
## Max.cor.Y.cv.0.rpart 0 0.5000000
## Max.cor.Y.cv.0.cp.0.rpart 0 0.9238966
## Max.cor.Y.rpart 3 0.8434283
## Max.cor.Y.glm 1 0.8591461
## Interact.High.cor.Y.glm 1 0.8617390
## Low.cor.X.glm 1 0.9028388
## All.X.glm 1 0.9069917
## All.X.bayesglm 1 0.9042146
## All.X.glmnet 9 0.8677904
## All.X.no.rnorm.rpart 3 0.8434283
## All.X.no.rnorm.rf 3 1.0000000
## All.Interact.X.glm 1 0.7954370
## All.Interact.X.bayesglm 1 0.9286388
## All.Interact.X.glmnet 9 0.8822461
## All.Interact.X.no.rnorm.rpart 3 0.8434283
## All.Interact.X.no.rnorm.rf 3 1.0000000
## opt.prob.threshold.fit max.f.score.fit
## MFO.myMFO_classfr 0.5 0.0000000
## Random.myrandom_classfr 0.4 0.6313559
## Max.cor.Y.cv.0.rpart 0.5 0.0000000
## Max.cor.Y.cv.0.cp.0.rpart 0.4 0.8603352
## Max.cor.Y.rpart 0.9 0.8105395
## Max.cor.Y.glm 0.7 0.8059333
## Interact.High.cor.Y.glm 0.6 0.7908102
## Low.cor.X.glm 0.5 0.8101852
## All.X.glm 0.5 0.8258362
## All.X.bayesglm 0.5 0.8175520
## All.X.glmnet 0.6 0.8029021
## All.X.no.rnorm.rpart 0.9 0.8105395
## All.X.no.rnorm.rf 0.6 1.0000000
## All.Interact.X.glm 0.9 0.7926829
## All.Interact.X.bayesglm 0.4 0.8505747
## All.Interact.X.glmnet 0.6 0.8064516
## All.Interact.X.no.rnorm.rpart 0.9 0.8105395
## All.Interact.X.no.rnorm.rf 0.6 1.0000000
## max.Accuracy.fit max.Kappa.fit max.auc.OOB
## MFO.myMFO_classfr 0.5386997 0.0000000 0.5000000
## Random.myrandom_classfr 0.4613003 0.0000000 0.5185354
## Max.cor.Y.cv.0.rpart 0.5386997 0.0000000 0.5000000
## Max.cor.Y.cv.0.cp.0.rpart 0.8710010 0.7404889 0.8997924
## Max.cor.Y.rpart 0.8276574 0.6497643 0.8469855
## Max.cor.Y.glm 0.7987616 0.5929577 0.8659702
## Interact.High.cor.Y.glm 0.7997936 0.5943920 0.8576352
## Low.cor.X.glm 0.7688338 0.5344407 0.8382546
## All.X.glm 0.7647059 0.5250728 0.8308232
## All.X.bayesglm 0.7770898 0.5506703 0.8427064
## All.X.glmnet 0.8008256 0.5969700 0.8560007
## All.X.no.rnorm.rpart 0.8338493 0.6645079 0.8469855
## All.X.no.rnorm.rf 0.8482972 0.6925622 0.9180131
## All.Interact.X.glm 0.7471620 0.4884840 0.6856640
## All.Interact.X.bayesglm 0.7925697 0.5800970 0.8660362
## All.Interact.X.glmnet 0.8307534 0.6555776 0.8742088
## All.Interact.X.no.rnorm.rpart 0.8328173 0.6623529 0.8469855
## All.Interact.X.no.rnorm.rf 0.8369453 0.6693933 0.9142644
## opt.prob.threshold.OOB max.f.score.OOB
## MFO.myMFO_classfr 0.5 0.0000000
## Random.myrandom_classfr 0.4 0.6339217
## Max.cor.Y.cv.0.rpart 0.5 0.0000000
## Max.cor.Y.cv.0.cp.0.rpart 0.3 0.8130841
## Max.cor.Y.rpart 0.9 0.8102981
## Max.cor.Y.glm 0.7 0.8047809
## Interact.High.cor.Y.glm 0.6 0.7865459
## Low.cor.X.glm 0.5 0.7600487
## All.X.glm 0.5 0.7545788
## All.X.bayesglm 0.5 0.7641278
## All.X.glmnet 0.7 0.7956104
## All.X.no.rnorm.rpart 0.9 0.8102981
## All.X.no.rnorm.rf 0.5 0.8294479
## All.Interact.X.glm 0.9 0.6898803
## All.Interact.X.bayesglm 0.5 0.7949367
## All.Interact.X.glmnet 0.6 0.8058511
## All.Interact.X.no.rnorm.rpart 0.9 0.8102981
## All.Interact.X.no.rnorm.rf 0.6 0.8280255
## max.Accuracy.OOB max.Kappa.OOB
## MFO.myMFO_classfr 0.5359551 0.0000000
## Random.myrandom_classfr 0.4640449 0.0000000
## Max.cor.Y.cv.0.rpart 0.5359551 0.0000000
## Max.cor.Y.cv.0.cp.0.rpart 0.8202247 0.6403332
## Max.cor.Y.rpart 0.8426966 0.6791719
## Max.cor.Y.glm 0.8348315 0.6639612
## Interact.High.cor.Y.glm 0.8146067 0.6240496
## Low.cor.X.glm 0.7786517 0.5546405
## All.X.glm 0.7741573 0.5454499
## All.X.bayesglm 0.7842697 0.5654496
## All.X.glmnet 0.8325843 0.6580401
## All.X.no.rnorm.rpart 0.8426966 0.6791719
## All.X.no.rnorm.rf 0.8438202 0.6854548
## All.Interact.X.glm 0.6797753 0.3658021
## All.Interact.X.bayesglm 0.8179775 0.6319103
## All.Interact.X.glmnet 0.8359551 0.6661923
## All.Interact.X.no.rnorm.rpart 0.8426966 0.6791719
## All.Interact.X.no.rnorm.rf 0.8483146 0.6930078
## inv.elapsedtime.everything
## MFO.myMFO_classfr 2.72479564
## Random.myrandom_classfr 3.89105058
## Max.cor.Y.cv.0.rpart 1.67224080
## Max.cor.Y.cv.0.cp.0.rpart 2.12314225
## Max.cor.Y.rpart 1.04275287
## Max.cor.Y.glm 1.04712042
## Interact.High.cor.Y.glm 1.00908174
## Low.cor.X.glm 0.57438254
## All.X.glm 0.46040516
## All.X.bayesglm 0.30312216
## All.X.glmnet 0.14302059
## All.X.no.rnorm.rpart 0.54525627
## All.X.no.rnorm.rf 0.05078462
## All.Interact.X.glm 0.18625442
## All.Interact.X.bayesglm 0.15076134
## All.Interact.X.glmnet 0.06725854
## All.Interact.X.no.rnorm.rpart 0.40950041
## All.Interact.X.no.rnorm.rf 0.02492212
## inv.elapsedtime.final inv.aic.fit
## MFO.myMFO_classfr 333.33333333 NA
## Random.myrandom_classfr 500.00000000 NA
## Max.cor.Y.cv.0.rpart 83.33333333 NA
## Max.cor.Y.cv.0.cp.0.rpart 125.00000000 NA
## Max.cor.Y.rpart 83.33333333 NA
## Max.cor.Y.glm 76.92307692 1.131910e-03
## Interact.High.cor.Y.glm 66.66666667 1.126327e-03
## Low.cor.X.glm 2.56410256 1.093940e-03
## All.X.glm 1.97628458 1.073471e-03
## All.X.bayesglm 1.29533679 9.463638e-04
## All.X.glmnet 0.65659882 NA
## All.X.no.rnorm.rpart 10.41666667 NA
## All.X.no.rnorm.rf 0.13837000 NA
## All.Interact.X.glm 0.49776008 6.669419e-05
## All.Interact.X.bayesglm 0.43610990 8.588238e-04
## All.Interact.X.glmnet 0.53908356 NA
## All.Interact.X.no.rnorm.rpart 6.02409639 NA
## All.Interact.X.no.rnorm.rf 0.05698006 NA
print(myplot_radar(radar_inp_df=plt_models_df))
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.
## Warning: Removed 5 rows containing missing values (geom_path).
## Warning: Removed 175 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_text).
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.
# print(myplot_radar(radar_inp_df=subset(plt_models_df,
# !(model_id %in% grep("random|MFO", plt_models_df$model_id, value=TRUE)))))
# Compute CI for <metric>SD
glb_models_df <- mutate(glb_models_df,
max.df = ifelse(max.nTuningRuns > 1, max.nTuningRuns - 1, NA),
min.sd2ci.scaler = ifelse(is.na(max.df), NA, qt(0.975, max.df)))
for (var in grep("SD", names(glb_models_df), value=TRUE)) {
# Does CI alredy exist ?
var_components <- unlist(strsplit(var, "SD"))
varActul <- paste0(var_components[1], var_components[2])
varUpper <- paste0(var_components[1], "Upper", var_components[2])
varLower <- paste0(var_components[1], "Lower", var_components[2])
if (varUpper %in% names(glb_models_df)) {
warning(varUpper, " already exists in glb_models_df")
# Assuming Lower also exists
next
}
print(sprintf("var:%s", var))
# CI is dependent on sample size in t distribution; df=n-1
glb_models_df[, varUpper] <- glb_models_df[, varActul] +
glb_models_df[, "min.sd2ci.scaler"] * glb_models_df[, var]
glb_models_df[, varLower] <- glb_models_df[, varActul] -
glb_models_df[, "min.sd2ci.scaler"] * glb_models_df[, var]
}
## Warning: max.AccuracyUpper.fit already exists in glb_models_df
## [1] "var:max.KappaSD.fit"
# Plot metrics with CI
plt_models_df <- glb_models_df[, "model_id", FALSE]
pltCI_models_df <- glb_models_df[, "model_id", FALSE]
for (var in grep("Upper", names(glb_models_df), value=TRUE)) {
var_components <- unlist(strsplit(var, "Upper"))
col_name <- unlist(paste(var_components, collapse=""))
plt_models_df[, col_name] <- glb_models_df[, col_name]
for (name in paste0(var_components[1], c("Upper", "Lower"), var_components[2]))
pltCI_models_df[, name] <- glb_models_df[, name]
}
build_statsCI_data <- function(plt_models_df) {
mltd_models_df <- melt(plt_models_df, id.vars="model_id")
mltd_models_df$data <- sapply(1:nrow(mltd_models_df),
function(row_ix) tail(unlist(strsplit(as.character(
mltd_models_df[row_ix, "variable"]), "[.]")), 1))
mltd_models_df$label <- sapply(1:nrow(mltd_models_df),
function(row_ix) head(unlist(strsplit(as.character(
mltd_models_df[row_ix, "variable"]),
paste0(".", mltd_models_df[row_ix, "data"]))), 1))
#print(mltd_models_df)
return(mltd_models_df)
}
mltd_models_df <- build_statsCI_data(plt_models_df)
mltdCI_models_df <- melt(pltCI_models_df, id.vars="model_id")
for (row_ix in 1:nrow(mltdCI_models_df)) {
for (type in c("Upper", "Lower")) {
if (length(var_components <- unlist(strsplit(
as.character(mltdCI_models_df[row_ix, "variable"]), type))) > 1) {
#print(sprintf("row_ix:%d; type:%s; ", row_ix, type))
mltdCI_models_df[row_ix, "label"] <- var_components[1]
mltdCI_models_df[row_ix, "data"] <-
unlist(strsplit(var_components[2], "[.]"))[2]
mltdCI_models_df[row_ix, "type"] <- type
break
}
}
}
wideCI_models_df <- reshape(subset(mltdCI_models_df, select=-variable),
timevar="type",
idvar=setdiff(names(mltdCI_models_df), c("type", "value", "variable")),
direction="wide")
#print(wideCI_models_df)
mrgdCI_models_df <- merge(wideCI_models_df, mltd_models_df, all.x=TRUE)
#print(mrgdCI_models_df)
# Merge stats back in if CIs don't exist
goback_vars <- c()
for (var in unique(mltd_models_df$label)) {
for (type in unique(mltd_models_df$data)) {
var_type <- paste0(var, ".", type)
# if this data is already present, next
if (var_type %in% unique(paste(mltd_models_df$label, mltd_models_df$data,
sep=".")))
next
#print(sprintf("var_type:%s", var_type))
goback_vars <- c(goback_vars, var_type)
}
}
if (length(goback_vars) > 0) {
mltd_goback_df <- build_statsCI_data(glb_models_df[, c("model_id", goback_vars)])
mltd_models_df <- rbind(mltd_models_df, mltd_goback_df)
}
mltd_models_df <- merge(mltd_models_df, glb_models_df[, c("model_id", "model_method")],
all.x=TRUE)
png(paste0(glb_out_pfx, "models_bar.png"), width=480*3, height=480*2)
print(gp <- myplot_bar(mltd_models_df, "model_id", "value", colorcol_name="model_method") +
geom_errorbar(data=mrgdCI_models_df,
mapping=aes(x=model_id, ymax=value.Upper, ymin=value.Lower), width=0.5) +
facet_grid(label ~ data, scales="free") +
theme(axis.text.x = element_text(angle = 90,vjust = 0.5)))
dev.off()
## quartz_off_screen
## 2
print(gp)
# used for console inspection
get_model_sel_frmla <- function() {
model_evl_terms <- c(NULL)
for (metric in glb_model_evl_criteria)
model_evl_terms <- c(model_evl_terms,
ifelse(length(grep("max", metric)) > 0, "-", "+"), metric)
if (glb_is_classification && glb_is_binomial)
model_evl_terms <- c(model_evl_terms, "-", "opt.prob.threshold.OOB")
return(model_sel_frmla <- as.formula(paste(c("~ ", model_evl_terms), collapse=" ")))
}
dsp_models_cols <- c("model_id", glb_model_evl_criteria)
if (glb_is_classification && glb_is_binomial)
dsp_models_cols <- c(dsp_models_cols, "opt.prob.threshold.OOB")
print(dsp_models_df <- orderBy(model_sel_frmla <- get_model_sel_frmla(),
glb_models_df)[, dsp_models_cols])
## model_id max.Accuracy.OOB max.auc.OOB
## 18 All.Interact.X.no.rnorm.rf 0.8483146 0.9142644
## 13 All.X.no.rnorm.rf 0.8438202 0.9180131
## 5 Max.cor.Y.rpart 0.8426966 0.8469855
## 12 All.X.no.rnorm.rpart 0.8426966 0.8469855
## 17 All.Interact.X.no.rnorm.rpart 0.8426966 0.8469855
## 16 All.Interact.X.glmnet 0.8359551 0.8742088
## 6 Max.cor.Y.glm 0.8348315 0.8659702
## 11 All.X.glmnet 0.8325843 0.8560007
## 4 Max.cor.Y.cv.0.cp.0.rpart 0.8202247 0.8997924
## 15 All.Interact.X.bayesglm 0.8179775 0.8660362
## 7 Interact.High.cor.Y.glm 0.8146067 0.8576352
## 10 All.X.bayesglm 0.7842697 0.8427064
## 8 Low.cor.X.glm 0.7786517 0.8382546
## 9 All.X.glm 0.7741573 0.8308232
## 14 All.Interact.X.glm 0.6797753 0.6856640
## 1 MFO.myMFO_classfr 0.5359551 0.5000000
## 3 Max.cor.Y.cv.0.rpart 0.5359551 0.5000000
## 2 Random.myrandom_classfr 0.4640449 0.5185354
## max.Kappa.OOB min.aic.fit opt.prob.threshold.OOB
## 18 0.6930078 NA 0.6
## 13 0.6854548 NA 0.5
## 5 0.6791719 NA 0.9
## 12 0.6791719 NA 0.9
## 17 0.6791719 NA 0.9
## 16 0.6661923 NA 0.6
## 6 0.6639612 883.4623 0.7
## 11 0.6580401 NA 0.7
## 4 0.6403332 NA 0.3
## 15 0.6319103 1164.3831 0.5
## 7 0.6240496 887.8417 0.6
## 10 0.5654496 1056.6761 0.5
## 8 0.5546405 914.1270 0.5
## 9 0.5454499 931.5575 0.5
## 14 0.3658021 14993.8106 0.9
## 1 0.0000000 NA 0.5
## 3 0.0000000 NA 0.5
## 2 0.0000000 NA 0.4
print(myplot_radar(radar_inp_df=dsp_models_df))
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.
## Warning: Removed 75 rows containing missing values (geom_point).
## Warning: Removed 11 rows containing missing values (geom_text).
## Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
## Returning the palette you asked for with that many colors
## Warning: The shape palette can deal with a maximum of 6 discrete values
## because more than 6 becomes difficult to discriminate; you have
## 18. Consider specifying shapes manually if you must have them.
print("Metrics used for model selection:"); print(model_sel_frmla)
## [1] "Metrics used for model selection:"
## ~-max.Accuracy.OOB - max.auc.OOB - max.Kappa.OOB + min.aic.fit -
## opt.prob.threshold.OOB
## <environment: 0x7fa4be8b28e8>
print(sprintf("Best model id: %s", dsp_models_df[1, "model_id"]))
## [1] "Best model id: All.Interact.X.no.rnorm.rf"
glb_get_predictions <- function(df, mdl_id, rsp_var_out, prob_threshold_def=NULL) {
mdl <- glb_models_lst[[mdl_id]]
rsp_var_out <- paste0(rsp_var_out, mdl_id)
if (glb_is_regression) {
df[, rsp_var_out] <- predict(mdl, newdata=df, type="raw")
print(myplot_scatter(df, glb_rsp_var, rsp_var_out, smooth=TRUE))
df[, paste0(rsp_var_out, ".err")] <-
abs(df[, rsp_var_out] - df[, glb_rsp_var])
print(head(orderBy(reformulate(c("-", paste0(rsp_var_out, ".err"))),
df)))
}
if (glb_is_classification && glb_is_binomial) {
prob_threshold <- glb_models_df[glb_models_df$model_id == mdl_id,
"opt.prob.threshold.OOB"]
if (is.null(prob_threshold) || is.na(prob_threshold)) {
warning("Using default probability threshold: ", prob_threshold_def)
if (is.null(prob_threshold <- prob_threshold_def))
stop("Default probability threshold is NULL")
}
df[, paste0(rsp_var_out, ".prob")] <-
predict(mdl, newdata=df, type="prob")[, 2]
df[, rsp_var_out] <-
factor(levels(df[, glb_rsp_var])[
(df[, paste0(rsp_var_out, ".prob")] >=
prob_threshold) * 1 + 1], levels(df[, glb_rsp_var]))
# prediction stats already reported by myfit_mdl ???
}
if (glb_is_classification && !glb_is_binomial) {
df[, rsp_var_out] <- predict(mdl, newdata=df, type="raw")
df[, paste0(rsp_var_out, ".prob")] <-
predict(mdl, newdata=df, type="prob")
}
return(df)
}
glb_to_sav <- function() {
sav_allobs_df <<- glb_allobs_df
sav_trnobs_df <<- glb_trnobs_df
sav_fitobs_df <<- glb_fitobs_df
sav_OOBobs_df <<- glb_OOBobs_df
sav_newobs_df <<- glb_newobs_df
if (!is.null(glb_models_lst )) sav_models_lst <<- glb_models_lst
if (!is.null(glb_models_df )) sav_models_df <<- glb_models_df
if (!is.null(glb_feats_df )) sav_feats_df <<- glb_feats_df
if (any(grepl("glb_fitsimp_df", ls(), fixed=TRUE)) &&
!is.null(glb_featsimp_df)) sav_featsimp_df <<- glb_featsimp_df
}
#stop(here"); glb_to_sav(); glb_allobs_df <- sav_allobs_df; glb_trnobs_df <- sav_trnobs_df; glb_fitobs_df <- sav_fitobs_df; glb_OOBobs_df <- sav_OOBobs_df; sav_models_df <- glb_models_df; glb_models_df <- sav_models_df; glb_featsimp_df <- sav_featsimp_df
if (!is.null(glb_sel_mdl_id) & (glb_sel_mdl_id == "Ensemble.glmnet")) {
if (#(glb_is_regression) |
((glb_is_classification) & (!glb_is_binomial)))
stop("Ensemble models not implemented yet for multinomial classification")
tmp_models_df <- orderBy(get_model_sel_frmla(), glb_models_df)
row.names(tmp_models_df) <- tmp_models_df$model_id
# mdl_threshold_pos <- min(which(tmp_models_df$model_id %in%
# c("MFO.myMFO_classfr", "Baseline.mybaseln_classfr"))) - 1
mdl_threshold_pos <- min(which(grepl("MFO|Baseline", tmp_models_df$model_id))) - 1
for (model_id in tmp_models_df$model_id[1:mdl_threshold_pos]) {
glb_fitobs_df <- glb_get_predictions(df=glb_fitobs_df, model_id, glb_rsp_var_out)
glb_OOBobs_df <- glb_get_predictions(df=glb_OOBobs_df, model_id, glb_rsp_var_out)
}
model_id <- "Ensemble"; method <- "glmnet";
#rex_str <- paste0(gsub(".", "\\.", glb_rsp_var_out, fixed=TRUE), "(.+)(?!(prob))")
#tmp_names <- tail(names(glb_fitobs_df), 5)
#match_pos <- gregexpr(rex_str, tmp_names, perl=TRUE)
indep_vars_vctr <- grep(glb_rsp_var_out, names(glb_fitobs_df), fixed=TRUE, value=TRUE)
if (glb_is_classification && glb_is_binomial)
indep_vars_vctr <- grep("prob$", indep_vars_vctr, value=TRUE)
else
indep_vars_vctr <- indep_vars_vctr[!grepl("err$", indep_vars_vctr)]
ret_lst <- myfit_mdl(model_id=model_id, model_method=method,
indep_vars_vctr=indep_vars_vctr,
model_type=glb_model_type,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_fitobs_df, OOB_df=glb_OOBobs_df,
n_cv_folds=glb_n_cv_folds, tune_models_df=glb_tune_models_df)
}
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type =
## ifelse(type == : prediction from a rank-deficient fit may be misleading
## [1] "fitting model: Ensemble.glmnet"
## [1] " indep_vars: sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob, sold.fctr.predict.All.X.no.rnorm.rf.prob, sold.fctr.predict.Max.cor.Y.rpart.prob, sold.fctr.predict.All.X.no.rnorm.rpart.prob, sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob, sold.fctr.predict.All.Interact.X.glmnet.prob, sold.fctr.predict.Max.cor.Y.glm.prob, sold.fctr.predict.All.X.glmnet.prob, sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob, sold.fctr.predict.All.Interact.X.bayesglm.prob, sold.fctr.predict.Interact.High.cor.Y.glm.prob, sold.fctr.predict.All.X.bayesglm.prob, sold.fctr.predict.Low.cor.X.glm.prob, sold.fctr.predict.All.X.glm.prob, sold.fctr.predict.All.Interact.X.glm.prob"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.55, lambda = 0.0972 on full training set
## Warning in myfit_mdl(model_id = model_id, model_method = method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: lambda
## Length Class Mode
## a0 97 -none- numeric
## beta 1455 dgCMatrix S4
## df 97 -none- numeric
## dim 2 -none- numeric
## lambda 97 -none- numeric
## dev.ratio 97 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 15 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -2.5446998
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## 2.3378403
## sold.fctr.predict.All.X.no.rnorm.rf.prob
## 2.3819269
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## 0.1783523
## sold.fctr.predict.All.Interact.X.bayesglm.prob
## 0.1199788
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -9.90797572
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## 11.78238766
## sold.fctr.predict.All.X.no.rnorm.rf.prob
## 12.45455190
## sold.fctr.predict.Max.cor.Y.rpart.prob
## -0.80850918
## sold.fctr.predict.All.X.no.rnorm.rpart.prob
## -0.81784589
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob
## -0.80275250
## sold.fctr.predict.All.Interact.X.glmnet.prob
## -1.23144839
## sold.fctr.predict.All.X.glmnet.prob
## -0.46608208
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## -0.03955328
## sold.fctr.predict.All.Interact.X.bayesglm.prob
## -0.08909699
## sold.fctr.predict.All.X.bayesglm.prob
## -0.30426435
## sold.fctr.predict.Low.cor.X.glm.prob
## -0.06816013
## character(0)
## character(0)
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6313559
## 2 0.1 0.8216912
## 3 0.2 0.9664865
## 4 0.3 0.9845815
## 5 0.4 1.0000000
## 6 0.5 1.0000000
## 7 0.6 1.0000000
## 8 0.7 0.9735936
## 9 0.8 0.9044118
## 10 0.9 0.7685950
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.6000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.Ensemble.glmnet.N
## 1 N 522
## 2 Y NA
## sold.fctr.predict.Ensemble.glmnet.Y
## 1 NA
## 2 447
## Prediction
## Reference N Y
## N 522 0
## Y 0 447
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 1.000000e+00 1.000000e+00 9.962003e-01 1.000000e+00 5.386997e-01
## AccuracyPValue McnemarPValue
## 4.731267e-261 NaN
## [1] " calling mypredict_mdl for OOB:"
## threshold f.score
## 1 0.0 0.6339217
## 2 0.1 0.7431694
## 3 0.2 0.7945205
## 4 0.3 0.8103837
## 5 0.4 0.8217237
## 6 0.5 0.8314883
## 7 0.6 0.8259212
## 8 0.7 0.8211921
## 9 0.8 0.7859155
## 10 0.9 0.6473430
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.OOB"
## sold.fctr sold.fctr.predict.Ensemble.glmnet.N
## 1 N 415
## 2 Y 75
## sold.fctr.predict.Ensemble.glmnet.Y
## 1 62
## 2 338
## Prediction
## Reference N Y
## N 415 62
## Y 75 338
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.460674e-01 6.898797e-01 8.206560e-01 8.691620e-01 5.359551e-01
## AccuracyPValue McnemarPValue
## 1.053824e-85 3.052551e-01
## model_id model_method
## 1 Ensemble.glmnet glmnet
## feats
## 1 sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob, sold.fctr.predict.All.X.no.rnorm.rf.prob, sold.fctr.predict.Max.cor.Y.rpart.prob, sold.fctr.predict.All.X.no.rnorm.rpart.prob, sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob, sold.fctr.predict.All.Interact.X.glmnet.prob, sold.fctr.predict.Max.cor.Y.glm.prob, sold.fctr.predict.All.X.glmnet.prob, sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob, sold.fctr.predict.All.Interact.X.bayesglm.prob, sold.fctr.predict.Interact.High.cor.Y.glm.prob, sold.fctr.predict.All.X.bayesglm.prob, sold.fctr.predict.Low.cor.X.glm.prob, sold.fctr.predict.All.X.glm.prob, sold.fctr.predict.All.Interact.X.glm.prob
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 9 1.457 0.042
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 1 0.6 1 1
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit max.auc.OOB
## 1 0.9962003 1 1 0.9183253
## opt.prob.threshold.OOB max.f.score.OOB max.Accuracy.OOB
## 1 0.5 0.8314883 0.8460674
## max.AccuracyLower.OOB max.AccuracyUpper.OOB max.Kappa.OOB
## 1 0.820656 0.869162 0.6898797
## max.AccuracySD.fit max.KappaSD.fit
## 1 0 0
if (is.null(glb_sel_mdl_id))
glb_sel_mdl_id <- dsp_models_df[1, "model_id"] else
print(sprintf("User specified selection: %s", glb_sel_mdl_id))
## [1] "User specified selection: Ensemble.glmnet"
myprint_mdl(glb_sel_mdl <- glb_models_lst[[glb_sel_mdl_id]])
## Length Class Mode
## a0 97 -none- numeric
## beta 1455 dgCMatrix S4
## df 97 -none- numeric
## dim 2 -none- numeric
## lambda 97 -none- numeric
## dev.ratio 97 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 15 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -2.5446998
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## 2.3378403
## sold.fctr.predict.All.X.no.rnorm.rf.prob
## 2.3819269
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## 0.1783523
## sold.fctr.predict.All.Interact.X.bayesglm.prob
## 0.1199788
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -9.90797572
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## 11.78238766
## sold.fctr.predict.All.X.no.rnorm.rf.prob
## 12.45455190
## sold.fctr.predict.Max.cor.Y.rpart.prob
## -0.80850918
## sold.fctr.predict.All.X.no.rnorm.rpart.prob
## -0.81784589
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob
## -0.80275250
## sold.fctr.predict.All.Interact.X.glmnet.prob
## -1.23144839
## sold.fctr.predict.All.X.glmnet.prob
## -0.46608208
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## -0.03955328
## sold.fctr.predict.All.Interact.X.bayesglm.prob
## -0.08909699
## sold.fctr.predict.All.X.bayesglm.prob
## -0.30426435
## sold.fctr.predict.Low.cor.X.glm.prob
## -0.06816013
## character(0)
## character(0)
## [1] TRUE
# From here to save(), this should all be in one function
# these are executed in the same seq twice more:
# fit.data.training & predict.data.new chunks
glb_OOBobs_df <- glb_get_predictions(df=glb_OOBobs_df, mdl_id=glb_sel_mdl_id,
rsp_var_out=glb_rsp_var_out)
predct_accurate_var_name <- paste0(glb_rsp_var_out, glb_sel_mdl_id, ".accurate")
predct_error_var_name <- paste0(glb_rsp_var_out, glb_sel_mdl_id, ".err")
glb_OOBobs_df[, predct_accurate_var_name] <-
(glb_OOBobs_df[, glb_rsp_var] ==
glb_OOBobs_df[, paste0(glb_rsp_var_out, glb_sel_mdl_id)])
glb_featsimp_df <-
myget_feats_importance(mdl=glb_sel_mdl, featsimp_df=NULL)
glb_featsimp_df[, paste0(glb_sel_mdl_id, ".importance")] <- glb_featsimp_df$importance
print(glb_featsimp_df)
## importance
## sold.fctr.predict.All.X.no.rnorm.rf.prob 100.000000
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob 98.138116
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob 7.031892
## sold.fctr.predict.All.Interact.X.bayesglm.prob 4.587832
## sold.fctr.predict.Max.cor.Y.rpart.prob 0.000000
## sold.fctr.predict.All.X.no.rnorm.rpart.prob 0.000000
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob 0.000000
## sold.fctr.predict.All.Interact.X.glmnet.prob 0.000000
## sold.fctr.predict.Max.cor.Y.glm.prob 0.000000
## sold.fctr.predict.All.X.glmnet.prob 0.000000
## sold.fctr.predict.Interact.High.cor.Y.glm.prob 0.000000
## sold.fctr.predict.All.X.bayesglm.prob 0.000000
## sold.fctr.predict.Low.cor.X.glm.prob 0.000000
## sold.fctr.predict.All.X.glm.prob 0.000000
## sold.fctr.predict.All.Interact.X.glm.prob 0.000000
## Ensemble.glmnet.importance
## sold.fctr.predict.All.X.no.rnorm.rf.prob 100.000000
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob 98.138116
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob 7.031892
## sold.fctr.predict.All.Interact.X.bayesglm.prob 4.587832
## sold.fctr.predict.Max.cor.Y.rpart.prob 0.000000
## sold.fctr.predict.All.X.no.rnorm.rpart.prob 0.000000
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob 0.000000
## sold.fctr.predict.All.Interact.X.glmnet.prob 0.000000
## sold.fctr.predict.Max.cor.Y.glm.prob 0.000000
## sold.fctr.predict.All.X.glmnet.prob 0.000000
## sold.fctr.predict.Interact.High.cor.Y.glm.prob 0.000000
## sold.fctr.predict.All.X.bayesglm.prob 0.000000
## sold.fctr.predict.Low.cor.X.glm.prob 0.000000
## sold.fctr.predict.All.X.glm.prob 0.000000
## sold.fctr.predict.All.Interact.X.glm.prob 0.000000
# Used again in fit.data.training & predict.data.new chunks
glb_analytics_diag_plots <- function(obs_df, mdl_id, prob_threshold=NULL) {
featsimp_df <- glb_featsimp_df
featsimp_df$feat <- gsub("`(.*?)`", "\\1", row.names(featsimp_df))
featsimp_df$feat.interact <- gsub("(.*?):(.*)", "\\2", featsimp_df$feat)
featsimp_df$feat <- gsub("(.*?):(.*)", "\\1", featsimp_df$feat)
featsimp_df$feat.interact <- ifelse(featsimp_df$feat.interact == featsimp_df$feat,
NA, featsimp_df$feat.interact)
featsimp_df$feat <- gsub("(.*?)\\.fctr(.*)", "\\1\\.fctr", featsimp_df$feat)
featsimp_df$feat.interact <- gsub("(.*?)\\.fctr(.*)", "\\1\\.fctr", featsimp_df$feat.interact)
featsimp_df <- orderBy(~ -importance.max, summaryBy(importance ~ feat + feat.interact,
data=featsimp_df, FUN=max))
#rex_str=":(.*)"; txt_vctr=tail(featsimp_df$feat); ret_lst <- regexec(rex_str, txt_vctr); ret_lst <- regmatches(txt_vctr, ret_lst); ret_vctr <- sapply(1:length(ret_lst), function(pos_ix) ifelse(length(ret_lst[[pos_ix]]) > 0, ret_lst[[pos_ix]], "")); print(ret_vctr <- ret_vctr[ret_vctr != ""])
if (nrow(featsimp_df) > 5) {
warning("Limiting important feature scatter plots to 5 out of ", nrow(featsimp_df))
featsimp_df <- head(featsimp_df, 5)
}
# if (!all(is.na(featsimp_df$feat.interact)))
# stop("not implemented yet")
rsp_var_out <- paste0(glb_rsp_var_out, mdl_id)
for (var in featsimp_df$feat) {
plot_df <- melt(obs_df, id.vars=var,
measure.vars=c(glb_rsp_var, rsp_var_out))
# if (var == "<feat_name>") print(myplot_scatter(plot_df, var, "value",
# facet_colcol_name="variable") +
# geom_vline(xintercept=<divider_val>, linetype="dotted")) else
print(myplot_scatter(plot_df, var, "value", colorcol_name="variable",
facet_colcol_name="variable", jitter=TRUE) +
guides(color=FALSE))
}
if (glb_is_regression) {
if (nrow(featsimp_df) == 0)
warning("No important features in glb_fin_mdl") else
print(myplot_prediction_regression(df=obs_df,
feat_x=ifelse(nrow(featsimp_df) > 1, featsimp_df$feat[2],
".rownames"),
feat_y=featsimp_df$feat[1],
rsp_var=glb_rsp_var, rsp_var_out=rsp_var_out,
id_vars=glb_id_var)
# + facet_wrap(reformulate(featsimp_df$feat[2])) # if [1 or 2] is a factor
# + geom_point(aes_string(color="<col_name>.fctr")) # to color the plot
)
}
if (glb_is_classification) {
if (nrow(featsimp_df) == 0)
warning("No features in selected model are statistically important")
else print(myplot_prediction_classification(df=obs_df,
feat_x=ifelse(nrow(featsimp_df) > 1, featsimp_df$feat[2],
".rownames"),
feat_y=featsimp_df$feat[1],
rsp_var=glb_rsp_var,
rsp_var_out=rsp_var_out,
id_vars=glb_id_var,
prob_threshold=prob_threshold)
# + geom_hline(yintercept=<divider_val>, linetype = "dotted")
)
}
}
if (glb_is_classification && glb_is_binomial)
glb_analytics_diag_plots(obs_df=glb_OOBobs_df, mdl_id=glb_sel_mdl_id,
prob_threshold=glb_models_df[glb_models_df$model_id == glb_sel_mdl_id,
"opt.prob.threshold.OOB"]) else
glb_analytics_diag_plots(obs_df=glb_OOBobs_df, mdl_id=glb_sel_mdl_id)
## [1] "Min/Max Boundaries: "
## UniqueID sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## 5 10005 N 0.17
## 1859 11861 N 0.34
## sold.fctr.predict.All.Interact.X.no.rnorm.rf
## 5 N
## 1859 N
## sold.fctr.predict.All.X.no.rnorm.rf.prob
## 5 0.222
## 1859 0.260
## sold.fctr.predict.All.X.no.rnorm.rf
## 5 N
## 1859 N
## sold.fctr.predict.Max.cor.Y.rpart.prob
## 5 0.2115028
## 1859 0.2115028
## sold.fctr.predict.Max.cor.Y.rpart
## 5 N
## 1859 N
## sold.fctr.predict.All.X.no.rnorm.rpart.prob
## 5 0.2115028
## 1859 0.2115028
## sold.fctr.predict.All.X.no.rnorm.rpart
## 5 N
## 1859 N
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob
## 5 0.2115028
## 1859 0.2115028
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart
## 5 N
## 1859 N
## sold.fctr.predict.All.Interact.X.glmnet.prob
## 5 0.2589963
## 1859 0.2752168
## sold.fctr.predict.All.Interact.X.glmnet
## 5 N
## 1859 N
## sold.fctr.predict.Max.cor.Y.glm.prob sold.fctr.predict.Max.cor.Y.glm
## 5 0.4107302 N
## 1859 0.2427831 N
## sold.fctr.predict.All.X.glmnet.prob sold.fctr.predict.All.X.glmnet
## 5 0.2903361 N
## 1859 0.2841722 N
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## 5 0.1015625
## 1859 0.7777778
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart
## 5 N
## 1859 Y
## sold.fctr.predict.All.Interact.X.bayesglm.prob
## 5 0.009594626
## 1859 0.772431246
## sold.fctr.predict.All.Interact.X.bayesglm
## 5 N
## 1859 Y
## sold.fctr.predict.Interact.High.cor.Y.glm.prob
## 5 0.4075147
## 1859 0.2427705
## sold.fctr.predict.Interact.High.cor.Y.glm
## 5 N
## 1859 N
## sold.fctr.predict.All.X.bayesglm.prob
## 5 0.06255101
## 1859 0.91073392
## sold.fctr.predict.All.X.bayesglm sold.fctr.predict.Low.cor.X.glm.prob
## 5 N 0.01473314
## 1859 Y 0.99999996
## sold.fctr.predict.Low.cor.X.glm sold.fctr.predict.All.X.glm.prob
## 5 N 0.04705504
## 1859 Y 0.99999997
## sold.fctr.predict.All.X.glm sold.fctr.predict.All.Interact.X.glm.prob
## 5 N 2.220446e-16
## 1859 Y 1.000000e+00
## sold.fctr.predict.All.Interact.X.glm
## 5 N
## 1859 Y
## sold.fctr.predict.Ensemble.glmnet.prob
## 5 0.1626605
## 1859 0.2826691
## sold.fctr.predict.Ensemble.glmnet
## 5 N
## 1859 N
## sold.fctr.predict.Ensemble.glmnet.accurate
## 5 TRUE
## 1859 TRUE
## sold.fctr.predict.Ensemble.glmnet.error .label
## 5 0 10005
## 1859 0 11861
## [1] "Inaccurate: "
## UniqueID sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## 1447 11448 Y 0.000
## 991 10991 Y 0.002
## 1582 11583 Y 0.008
## 834 10834 Y 0.012
## 962 10962 Y 0.040
## 1589 11590 Y 0.080
## sold.fctr.predict.All.Interact.X.no.rnorm.rf
## 1447 N
## 991 N
## 1582 N
## 834 N
## 962 N
## 1589 N
## sold.fctr.predict.All.X.no.rnorm.rf.prob
## 1447 0.000
## 991 0.016
## 1582 0.030
## 834 0.112
## 962 0.110
## 1589 0.088
## sold.fctr.predict.All.X.no.rnorm.rf
## 1447 N
## 991 N
## 1582 N
## 834 N
## 962 N
## 1589 N
## sold.fctr.predict.Max.cor.Y.rpart.prob
## 1447 0.2115028
## 991 0.2115028
## 1582 0.2115028
## 834 0.2115028
## 962 0.2115028
## 1589 0.2115028
## sold.fctr.predict.Max.cor.Y.rpart
## 1447 N
## 991 N
## 1582 N
## 834 N
## 962 N
## 1589 N
## sold.fctr.predict.All.X.no.rnorm.rpart.prob
## 1447 0.2115028
## 991 0.2115028
## 1582 0.2115028
## 834 0.2115028
## 962 0.2115028
## 1589 0.2115028
## sold.fctr.predict.All.X.no.rnorm.rpart
## 1447 N
## 991 N
## 1582 N
## 834 N
## 962 N
## 1589 N
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob
## 1447 0.2115028
## 991 0.2115028
## 1582 0.2115028
## 834 0.2115028
## 962 0.2115028
## 1589 0.2115028
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart
## 1447 N
## 991 N
## 1582 N
## 834 N
## 962 N
## 1589 N
## sold.fctr.predict.All.Interact.X.glmnet.prob
## 1447 0.3463592
## 991 0.2592719
## 1582 0.3293101
## 834 0.2557878
## 962 0.3138472
## 1589 0.2401187
## sold.fctr.predict.All.Interact.X.glmnet
## 1447 N
## 991 N
## 1582 N
## 834 N
## 962 N
## 1589 N
## sold.fctr.predict.Max.cor.Y.glm.prob sold.fctr.predict.Max.cor.Y.glm
## 1447 0.5774744 N
## 991 0.1375672 N
## 1582 0.4999143 N
## 834 0.1289347 N
## 962 0.4277115 N
## 1589 0.1408025 N
## sold.fctr.predict.All.X.glmnet.prob sold.fctr.predict.All.X.glmnet
## 1447 0.4342911 N
## 991 0.2403315 N
## 1582 0.3973766 N
## 834 0.2337584 N
## 962 0.3691777 N
## 1589 0.2186695 N
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## 1447 0.10156250
## 991 0.04191617
## 1582 0.10156250
## 834 0.04191617
## 962 0.10156250
## 1589 0.04191617
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart
## 1447 N
## 991 N
## 1582 N
## 834 N
## 962 N
## 1589 N
## sold.fctr.predict.All.Interact.X.bayesglm.prob
## 1447 0.1818562
## 991 0.1584976
## 1582 0.1505292
## 834 0.2988306
## 962 0.1508337
## 1589 0.1763513
## sold.fctr.predict.All.Interact.X.bayesglm
## 1447 N
## 991 N
## 1582 N
## 834 N
## 962 N
## 1589 N
## sold.fctr.predict.Interact.High.cor.Y.glm.prob
## 1447 0.5714177
## 991 0.1389766
## 1582 0.4950528
## 834 0.1304135
## 962 0.4241684
## 1589 0.1421834
## sold.fctr.predict.Interact.High.cor.Y.glm
## 1447 N
## 991 N
## 1582 N
## 834 N
## 962 N
## 1589 N
## sold.fctr.predict.All.X.bayesglm.prob
## 1447 0.42148585
## 991 0.10512853
## 1582 0.32530444
## 834 0.19434095
## 962 0.30871512
## 1589 0.07684669
## sold.fctr.predict.All.X.bayesglm sold.fctr.predict.Low.cor.X.glm.prob
## 1447 N 0.39474523
## 991 N 0.08233980
## 1582 N 0.30227038
## 834 N 0.15101069
## 962 N 0.35520389
## 1589 N 0.07338957
## sold.fctr.predict.Low.cor.X.glm sold.fctr.predict.All.X.glm.prob
## 1447 N 0.40643422
## 991 N 0.08679229
## 1582 N 0.30697032
## 834 N 0.16182543
## 962 N 0.30484639
## 1589 N 0.07398338
## sold.fctr.predict.All.X.glm sold.fctr.predict.All.Interact.X.glm.prob
## 1447 N 2.220446e-16
## 991 N 2.220446e-16
## 1582 N 2.220446e-16
## 834 N 1.000000e+00
## 962 N 2.220446e-16
## 1589 N 2.220446e-16
## sold.fctr.predict.All.Interact.X.glm
## 1447 N
## 991 N
## 1582 N
## 834 Y
## 962 N
## 1589 N
## sold.fctr.predict.Ensemble.glmnet.prob
## 1447 0.07069188
## 991 0.07277253
## 1582 0.07681084
## 834 0.09375822
## 962 0.09865429
## 1589 0.10181302
## sold.fctr.predict.Ensemble.glmnet
## 1447 N
## 991 N
## 1582 N
## 834 N
## 962 N
## 1589 N
## sold.fctr.predict.Ensemble.glmnet.accurate
## 1447 FALSE
## 991 FALSE
## 1582 FALSE
## 834 FALSE
## 962 FALSE
## 1589 FALSE
## sold.fctr.predict.Ensemble.glmnet.error
## 1447 -0.4293081
## 991 -0.4272275
## 1582 -0.4231892
## 834 -0.4062418
## 962 -0.4013457
## 1589 -0.3981870
## UniqueID sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## 991 10991 Y 0.002
## 19 10019 Y 0.340
## 1107 11107 Y 0.530
## 1541 11542 N 0.640
## 1470 11471 N 0.766
## 296 10296 N 0.986
## sold.fctr.predict.All.Interact.X.no.rnorm.rf
## 991 N
## 19 N
## 1107 N
## 1541 Y
## 1470 Y
## 296 Y
## sold.fctr.predict.All.X.no.rnorm.rf.prob
## 991 0.016
## 19 0.330
## 1107 0.420
## 1541 0.488
## 1470 0.782
## 296 0.988
## sold.fctr.predict.All.X.no.rnorm.rf
## 991 N
## 19 N
## 1107 N
## 1541 N
## 1470 Y
## 296 Y
## sold.fctr.predict.Max.cor.Y.rpart.prob
## 991 0.2115028
## 19 0.2115028
## 1107 0.2115028
## 1541 0.2115028
## 1470 0.9228571
## 296 0.9228571
## sold.fctr.predict.Max.cor.Y.rpart
## 991 N
## 19 N
## 1107 N
## 1541 N
## 1470 Y
## 296 Y
## sold.fctr.predict.All.X.no.rnorm.rpart.prob
## 991 0.2115028
## 19 0.2115028
## 1107 0.2115028
## 1541 0.2115028
## 1470 0.9228571
## 296 0.9228571
## sold.fctr.predict.All.X.no.rnorm.rpart
## 991 N
## 19 N
## 1107 N
## 1541 N
## 1470 Y
## 296 Y
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob
## 991 0.2115028
## 19 0.2115028
## 1107 0.2115028
## 1541 0.2115028
## 1470 0.9228571
## 296 0.9228571
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart
## 991 N
## 19 N
## 1107 N
## 1541 N
## 1470 Y
## 296 Y
## sold.fctr.predict.All.Interact.X.glmnet.prob
## 991 0.2592719
## 19 0.2758950
## 1107 0.2791061
## 1541 0.2525105
## 1470 0.9261020
## 296 0.7328073
## sold.fctr.predict.All.Interact.X.glmnet
## 991 N
## 19 N
## 1107 N
## 1541 N
## 1470 Y
## 296 Y
## sold.fctr.predict.Max.cor.Y.glm.prob sold.fctr.predict.Max.cor.Y.glm
## 991 0.1375672 N
## 19 0.1842910 N
## 1107 0.2577252 N
## 1541 0.1211751 N
## 1470 0.9396433 Y
## 296 0.8073492 Y
## sold.fctr.predict.All.X.glmnet.prob sold.fctr.predict.All.X.glmnet
## 991 0.2403315 N
## 19 0.2725672 N
## 1107 0.2921147 N
## 1541 0.2276382 N
## 1470 0.8743624 Y
## 296 0.7269362 Y
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## 991 0.04191617
## 19 0.53333333
## 1107 0.13333333
## 1541 0.04191617
## 1470 0.94171779
## 296 0.94171779
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart
## 991 N
## 19 Y
## 1107 N
## 1541 N
## 1470 Y
## 296 Y
## sold.fctr.predict.All.Interact.X.bayesglm.prob
## 991 0.1584976
## 19 0.2432798
## 1107 0.1954608
## 1541 0.3906655
## 1470 0.9995656
## 296 0.9023783
## sold.fctr.predict.All.Interact.X.bayesglm
## 991 N
## 19 N
## 1107 N
## 1541 N
## 1470 Y
## 296 Y
## sold.fctr.predict.Interact.High.cor.Y.glm.prob
## 991 0.1389766
## 19 0.1851791
## 1107 0.2574524
## 1541 0.1227075
## 1470 0.9530526
## 296 0.8319335
## sold.fctr.predict.Interact.High.cor.Y.glm
## 991 N
## 19 N
## 1107 N
## 1541 N
## 1470 Y
## 296 Y
## sold.fctr.predict.All.X.bayesglm.prob
## 991 0.1051285
## 19 0.1878507
## 1107 0.3366319
## 1541 0.2572607
## 1470 0.9942701
## 296 0.8511794
## sold.fctr.predict.All.X.bayesglm sold.fctr.predict.Low.cor.X.glm.prob
## 991 N 0.0823398
## 19 N 0.1796292
## 1107 N 0.4095530
## 1541 N 0.2354375
## 1470 Y 0.9911401
## 296 Y 0.8569773
## sold.fctr.predict.Low.cor.X.glm sold.fctr.predict.All.X.glm.prob
## 991 N 0.08679229
## 19 N 0.17948604
## 1107 N 0.40596515
## 1541 N 0.25204312
## 1470 Y 0.99569308
## 296 Y 0.86092054
## sold.fctr.predict.All.X.glm sold.fctr.predict.All.Interact.X.glm.prob
## 991 N 2.220446e-16
## 19 N 1.000000e+00
## 1107 N 2.220446e-16
## 1541 N 1.000000e+00
## 1470 Y 1.000000e+00
## 296 Y 1.000000e+00
## sold.fctr.predict.All.Interact.X.glm
## 991 N
## 19 Y
## 1107 N
## 1541 Y
## 1470 Y
## 296 Y
## sold.fctr.predict.Ensemble.glmnet.prob
## 991 0.07277253
## 19 0.29712381
## 1107 0.43653608
## 1541 0.54588029
## 1470 0.80790361
## 296 0.92156888
## sold.fctr.predict.Ensemble.glmnet
## 991 N
## 19 N
## 1107 N
## 1541 Y
## 1470 Y
## 296 Y
## sold.fctr.predict.Ensemble.glmnet.accurate
## 991 FALSE
## 19 FALSE
## 1107 FALSE
## 1541 FALSE
## 1470 FALSE
## 296 FALSE
## sold.fctr.predict.Ensemble.glmnet.error
## 991 -0.42722747
## 19 -0.20287619
## 1107 -0.06346392
## 1541 0.04588029
## 1470 0.30790361
## 296 0.42156888
## UniqueID sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## 472 10472 N 0.986
## 841 10841 N 0.970
## 955 10955 N 0.992
## 199 10199 N 0.970
## 296 10296 N 0.986
## 416 10416 N 1.000
## sold.fctr.predict.All.Interact.X.no.rnorm.rf
## 472 Y
## 841 Y
## 955 Y
## 199 Y
## 296 Y
## 416 Y
## sold.fctr.predict.All.X.no.rnorm.rf.prob
## 472 0.932
## 841 0.978
## 955 0.960
## 199 0.984
## 296 0.988
## 416 0.994
## sold.fctr.predict.All.X.no.rnorm.rf
## 472 Y
## 841 Y
## 955 Y
## 199 Y
## 296 Y
## 416 Y
## sold.fctr.predict.Max.cor.Y.rpart.prob
## 472 0.9228571
## 841 0.9228571
## 955 0.9228571
## 199 0.9228571
## 296 0.9228571
## 416 0.9228571
## sold.fctr.predict.Max.cor.Y.rpart
## 472 Y
## 841 Y
## 955 Y
## 199 Y
## 296 Y
## 416 Y
## sold.fctr.predict.All.X.no.rnorm.rpart.prob
## 472 0.9228571
## 841 0.9228571
## 955 0.9228571
## 199 0.9228571
## 296 0.9228571
## 416 0.9228571
## sold.fctr.predict.All.X.no.rnorm.rpart
## 472 Y
## 841 Y
## 955 Y
## 199 Y
## 296 Y
## 416 Y
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob
## 472 0.9228571
## 841 0.9228571
## 955 0.9228571
## 199 0.9228571
## 296 0.9228571
## 416 0.9228571
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart
## 472 Y
## 841 Y
## 955 Y
## 199 Y
## 296 Y
## 416 Y
## sold.fctr.predict.All.Interact.X.glmnet.prob
## 472 0.6889045
## 841 0.7587202
## 955 0.7890379
## 199 0.8058413
## 296 0.7328073
## 416 0.7922638
## sold.fctr.predict.All.Interact.X.glmnet
## 472 Y
## 841 Y
## 955 Y
## 199 Y
## 296 Y
## 416 Y
## sold.fctr.predict.Max.cor.Y.glm.prob sold.fctr.predict.Max.cor.Y.glm
## 472 0.7556156 Y
## 841 0.8296527 Y
## 955 0.8525572 Y
## 199 0.8704164 Y
## 296 0.8073492 Y
## 416 0.8446071 Y
## sold.fctr.predict.All.X.glmnet.prob sold.fctr.predict.All.X.glmnet
## 472 0.7229457 Y
## 841 0.7392313 Y
## 955 0.7568769 Y
## 199 0.7608577 Y
## 296 0.7269362 Y
## 416 0.7744068 Y
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## 472 0.8235294
## 841 0.9417178
## 955 0.9417178
## 199 0.9417178
## 296 0.9417178
## 416 0.9417178
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart
## 472 Y
## 841 Y
## 955 Y
## 199 Y
## 296 Y
## 416 Y
## sold.fctr.predict.All.Interact.X.bayesglm.prob
## 472 0.8486180
## 841 0.9777102
## 955 0.9458590
## 199 0.9472238
## 296 0.9023783
## 416 0.9751765
## sold.fctr.predict.All.Interact.X.bayesglm
## 472 Y
## 841 Y
## 955 Y
## 199 Y
## 296 Y
## 416 Y
## sold.fctr.predict.Interact.High.cor.Y.glm.prob
## 472 0.7859171
## 841 0.7496888
## 955 0.8846367
## 199 0.8631023
## 296 0.8319335
## 416 0.8647130
## sold.fctr.predict.Interact.High.cor.Y.glm
## 472 Y
## 841 Y
## 955 Y
## 199 Y
## 296 Y
## 416 Y
## sold.fctr.predict.All.X.bayesglm.prob sold.fctr.predict.All.X.bayesglm
## 472 0.8540818 Y
## 841 0.8985471 Y
## 955 0.8917715 Y
## 199 0.9209272 Y
## 296 0.8511794 Y
## 416 0.9187159 Y
## sold.fctr.predict.Low.cor.X.glm.prob sold.fctr.predict.Low.cor.X.glm
## 472 0.8534160 Y
## 841 0.9200228 Y
## 955 0.9030208 Y
## 199 0.9527908 Y
## 296 0.8569773 Y
## 416 0.9193082 Y
## sold.fctr.predict.All.X.glm.prob sold.fctr.predict.All.X.glm
## 472 0.8624339 Y
## 841 0.8355489 Y
## 955 0.9015627 Y
## 199 0.9260455 Y
## 296 0.8609205 Y
## 416 0.9250955 Y
## sold.fctr.predict.All.Interact.X.glm.prob
## 472 1.000000e+00
## 841 1.000000e+00
## 955 1.000000e+00
## 199 2.220446e-16
## 296 1.000000e+00
## 416 1.000000e+00
## sold.fctr.predict.All.Interact.X.glm
## 472 Y
## 841 Y
## 955 Y
## 199 N
## 296 Y
## 416 Y
## sold.fctr.predict.Ensemble.glmnet.prob
## 472 0.9088360
## 841 0.9175190
## 955 0.9179147
## 199 0.9183720
## 296 0.9215689
## 416 0.9255775
## sold.fctr.predict.Ensemble.glmnet
## 472 Y
## 841 Y
## 955 Y
## 199 Y
## 296 Y
## 416 Y
## sold.fctr.predict.Ensemble.glmnet.accurate
## 472 FALSE
## 841 FALSE
## 955 FALSE
## 199 FALSE
## 296 FALSE
## 416 FALSE
## sold.fctr.predict.Ensemble.glmnet.error
## 472 0.4088360
## 841 0.4175190
## 955 0.4179147
## 199 0.4183720
## 296 0.4215689
## 416 0.4255775
write.csv(glb_OOBobs_df[, c(glb_id_var,
grep(glb_rsp_var, names(glb_OOBobs_df), fixed=TRUE, value=TRUE))],
paste0(gsub(".", "_", paste0(glb_out_pfx, glb_sel_mdl_id), fixed=TRUE),
"_OOBobs.csv"), row.names=FALSE)
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.models", major.inc=FALSE)
## label step_major step_minor bgn end elapsed
## 12 fit.models 7 2 236.655 268.982 32.327
## 13 fit.models 7 3 268.982 NA NA
# if (sum(is.na(glb_allobs_df$D.P.http)) > 0)
# stop("fit.models_3: Why is this happening ?")
#stop(here"); glb_to_sav()
sync_glb_obs_df <- function() {
# Merge or cbind ?
for (col in setdiff(names(glb_fitobs_df), names(glb_trnobs_df)))
glb_trnobs_df[glb_trnobs_df$.lcn == "Fit", col] <<- glb_fitobs_df[, col]
for (col in setdiff(names(glb_fitobs_df), names(glb_allobs_df)))
glb_allobs_df[glb_allobs_df$.lcn == "Fit", col] <<- glb_fitobs_df[, col]
if (all(is.na(glb_newobs_df[, glb_rsp_var])))
for (col in setdiff(names(glb_OOBobs_df), names(glb_trnobs_df)))
glb_trnobs_df[glb_trnobs_df$.lcn == "OOB", col] <<- glb_OOBobs_df[, col]
for (col in setdiff(names(glb_OOBobs_df), names(glb_allobs_df)))
glb_allobs_df[glb_allobs_df$.lcn == "OOB", col] <<- glb_OOBobs_df[, col]
}
sync_glb_obs_df()
print(setdiff(names(glb_newobs_df), names(glb_allobs_df)))
## character(0)
if (glb_save_envir)
save(glb_feats_df,
glb_allobs_df, #glb_trnobs_df, glb_fitobs_df, glb_OOBobs_df, glb_newobs_df,
glb_models_df, dsp_models_df, glb_models_lst, glb_sel_mdl, glb_sel_mdl_id,
glb_model_type,
file=paste0(glb_out_pfx, "selmdl_dsk.RData"))
#load(paste0(glb_out_pfx, "selmdl_dsk.RData"))
rm(ret_lst)
replay.petrisim(pn=glb_analytics_pn,
replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs,
"model.selected")), flip_coord=TRUE)
## time trans "bgn " "fit.data.training.all " "predict.data.new " "end "
## 0.0000 multiple enabled transitions: data.training.all data.new model.selected firing: data.training.all
## 1.0000 1 2 1 0 0
## 1.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction firing: data.new
## 2.0000 2 1 1 1 0
## 2.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction data.new.prediction firing: model.selected
## 3.0000 3 0 2 1 0
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.data.training", major.inc=TRUE)
## label step_major step_minor bgn end elapsed
## 13 fit.models 7 3 268.982 276.637 7.655
## 14 fit.data.training 8 0 276.638 NA NA
8.0: fit data training#load(paste0(glb_inp_pfx, "dsk.RData"))
if (!is.null(glb_fin_mdl_id) && (glb_fin_mdl_id %in% names(glb_models_lst))) {
warning("Final model same as user selected model")
glb_fin_mdl <- glb_sel_mdl
} else {
#stop(here"); glb_to_sav()
if (grepl("Ensemble", glb_sel_mdl_id)) {
# Find which models are relevant
mdlimp_df <- subset(varImp(glb_sel_mdl)$importance, Overall > 5)
# Fit selected models on glb_trnobs_df
for (mdl_id in gsub(".prob", "",
gsub(glb_rsp_var_out, "", row.names(mdlimp_df), fixed=TRUE),
fixed=TRUE)) {
mdl_id_components <- unlist(strsplit(mdl_id, "[.]"))
ret_lst <-
myfit_mdl(model_id=paste0(c(head(mdl_id_components, -1), "Train"),
collapse="."),
model_method=tail(mdl_id_components, 1),
indep_vars_vctr=trim(unlist(strsplit(
glb_models_df[glb_models_df$model_id == mdl_id, "feats"], "[,]"))),
model_type=glb_model_type,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_trnobs_df, OOB_df=NULL,
n_cv_folds=glb_n_cv_folds, tune_models_df=glb_tune_models_df)
glb_trnobs_df <- glb_get_predictions(df=glb_trnobs_df,
mdl_id=tail(glb_models_df$model_id, 1),
rsp_var_out=glb_rsp_var_out,
prob_threshold_def=subset(glb_models_df,
model_id == mdl_id)$opt.prob.threshold.OOB)
glb_newobs_df <- glb_get_predictions(df=glb_newobs_df,
mdl_id=tail(glb_models_df$model_id, 1),
rsp_var_out=glb_rsp_var_out,
prob_threshold_def=subset(glb_models_df,
model_id == mdl_id)$opt.prob.threshold.OOB)
}
}
# "Final" model
if ((model_method <- glb_sel_mdl$method) == "custom")
# get actual method from the model_id
model_method <- tail(unlist(strsplit(glb_sel_mdl_id, "[.]")), 1)
if (grepl("Ensemble", glb_sel_mdl_id)) {
# Find which models are relevant
mdlimp_df <- subset(varImp(glb_sel_mdl)$importance, Overall > 5)
if (glb_is_classification && glb_is_binomial)
indep_vars_vctr <- gsub("(.*)\\.(.*)\\.prob", "\\1\\.Train\\.\\2\\.prob",
row.names(mdlimp_df)) else
indep_vars_vctr <- gsub("(.*)\\.(.*)", "\\1\\.Train\\.\\2",
row.names(mdlimp_df))
} else indep_vars_vctr <-
trim(unlist(strsplit(glb_models_df[glb_models_df$model_id == glb_sel_mdl_id
, "feats"], "[,]")))
# Discontinuing use of tune_finmdl_df;
# since final model needs to be cved on glb_trnobs_df
tune_finmdl_df <- NULL
if (nrow(glb_sel_mdl$bestTune) > 0) {
for (param in names(glb_sel_mdl$bestTune)) {
#print(sprintf("param: %s", param))
if (glb_sel_mdl$bestTune[1, param] != "none")
tune_finmdl_df <- rbind(tune_finmdl_df,
data.frame(parameter=param,
min=glb_sel_mdl$bestTune[1, param],
max=glb_sel_mdl$bestTune[1, param],
by=1)) # by val does not matter
}
}
# Sync with parameters in mydsutils.R
require(gdata)
ret_lst <- myfit_mdl(model_id="Final", model_method=model_method,
indep_vars_vctr=indep_vars_vctr,
model_type=glb_model_type,
rsp_var=glb_rsp_var, rsp_var_out=glb_rsp_var_out,
fit_df=glb_trnobs_df, OOB_df=NULL,
n_cv_folds=glb_n_cv_folds,
tune_models_df=glb_tune_models_df, #tune_finmdl_df,
# Automate from here
# Issues if glb_sel_mdl$method == "rf" b/c trainControl is "oob"; not "cv"
model_loss_mtrx=glb_model_metric_terms,
model_summaryFunction=glb_sel_mdl$control$summaryFunction,
model_metric=glb_sel_mdl$metric,
model_metric_maximize=glb_sel_mdl$maximize)
glb_fin_mdl <- glb_models_lst[[length(glb_models_lst)]]
glb_fin_mdl_id <- glb_models_df[length(glb_models_lst), "model_id"]
}
## [1] "fitting model: All.Interact.X.no.rnorm.Train.rf"
## [1] " indep_vars: D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 129 on full training set
## Length Class Mode
## call 4 -none- call
## type 1 -none- character
## predicted 1859 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 3718 matrix numeric
## oob.times 1859 -none- numeric
## classes 2 -none- character
## importance 257 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 1859 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 257 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6325855
## 2 0.1 0.8557214
## 3 0.2 0.9513274
## 4 0.3 0.9834191
## 5 0.4 1.0000000
## 6 0.5 1.0000000
## 7 0.6 0.9982528
## 8 0.7 0.9644792
## 9 0.8 0.8974359
## 10 0.9 0.8227242
## 11 1.0 0.3284742
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.N
## 1 N 999
## 2 Y NA
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.Y
## 1 NA
## 2 860
## Prediction
## Reference N Y
## N 999 0
## Y 0 860
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 1.0000000 1.0000000 0.9980176 1.0000000 0.5373857
## AccuracyPValue McnemarPValue
## 0.0000000 NaN
## model_id model_method
## 1 All.Interact.X.no.rnorm.Train.rf rf
## feats
## 1 D.ratio.nstopwrds.nwrds, D.terms.n.stem.stop.Ratio, D.npnct01.log, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.npnct12.log, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, prdl.my.descr.fctr*idseq.my, prdl.my.descr.fctr*D.ratio.sum.TfIdf.nwrds, prdl.my.descr.fctr*D.TfIdf.sum.stem.stop.Ratio, prdl.my.descr.fctr*D.npnct15.log, prdl.my.descr.fctr*D.npnct03.log, prdl.my.descr.fctr*D.nwrds.log, prdl.my.descr.fctr*D.nchrs.log, startprice.diff*biddable, cellular.fctr*carrier.fctr, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 3 83.205 30.486
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 1 0.5 1 0.838628
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9980176 1 0.673513
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.01046481 0.02054501
## Warning in glb_get_predictions(df = glb_trnobs_df, mdl_id =
## tail(glb_models_df$model_id, : Using default probability threshold: 0.6
## Warning in glb_get_predictions(df = glb_newobs_df, mdl_id =
## tail(glb_models_df$model_id, : Using default probability threshold: 0.6
## [1] "fitting model: All.X.no.rnorm.Train.rf"
## [1] " indep_vars: biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr"
## Aggregating results
## Selecting tuning parameters
## Fitting mtry = 77 on full training set
## Length Class Mode
## call 4 -none- call
## type 1 -none- character
## predicted 1859 factor numeric
## err.rate 1500 -none- numeric
## confusion 6 -none- numeric
## votes 3718 matrix numeric
## oob.times 1859 -none- numeric
## classes 2 -none- character
## importance 153 -none- numeric
## importanceSD 0 -none- NULL
## localImportance 0 -none- NULL
## proximity 0 -none- NULL
## ntree 1 -none- numeric
## mtry 1 -none- numeric
## forest 14 -none- list
## y 1859 factor numeric
## test 0 -none- NULL
## inbag 0 -none- NULL
## xNames 153 -none- character
## problemType 1 -none- character
## tuneValue 1 data.frame list
## obsLevels 2 -none- character
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6325855
## 2 0.1 0.8535980
## 3 0.2 0.9534368
## 4 0.3 0.9839817
## 5 0.4 0.9982589
## 6 0.5 1.0000000
## 7 0.6 0.9994183
## 8 0.7 0.9644792
## 9 0.8 0.8988476
## 10 0.9 0.8283379
## 11 1.0 0.2340862
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.All.X.no.rnorm.Train.rf.N
## 1 N 999
## 2 Y NA
## sold.fctr.predict.All.X.no.rnorm.Train.rf.Y
## 1 NA
## 2 860
## Prediction
## Reference N Y
## N 999 0
## Y 0 860
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 1.0000000 1.0000000 0.9980176 1.0000000 0.5373857
## AccuracyPValue McnemarPValue
## 0.0000000 NaN
## model_id model_method
## 1 All.X.no.rnorm.Train.rf rf
## feats
## 1 biddable, D.ratio.nstopwrds.nwrds, D.npnct15.log, D.npnct03.log, D.terms.n.stem.stop.Ratio, D.ratio.sum.TfIdf.nwrds, D.npnct01.log, D.TfIdf.sum.stem.stop.Ratio, storage.fctr, D.npnct11.log, D.npnct10.log, D.TfIdf.sum.post.stop, D.TfIdf.sum.post.stem, D.sum.TfIdf, prdl.my.descr.fctr, D.npnct13.log, color.fctr, D.npnct08.log, D.npnct16.log, D.npnct24.log, D.nstopwrds.log, D.npnct06.log, D.npnct28.log, D.nuppr.log, D.nchrs.log, D.nwrds.log, D.npnct12.log, carrier.fctr, D.npnct09.log, D.ndgts.log, D.nwrds.unq.log, D.terms.n.post.stem.log, D.terms.n.post.stop.log, cellular.fctr, D.npnct14.log, D.terms.n.post.stem, D.terms.n.post.stop, D.npnct05.log, condition.fctr, idseq.my, startprice.diff, prdl.my.descr.fctr:.clusterid.fctr
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 3 45.755 16.036
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 1 0.5 1 0.8413179
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9980176 1 0.6787273
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.008745356 0.01827901
## Warning in glb_get_predictions(df = glb_trnobs_df, mdl_id =
## tail(glb_models_df$model_id, : Using default probability threshold: 0.5
## Warning in glb_get_predictions(df = glb_newobs_df, mdl_id =
## tail(glb_models_df$model_id, : Using default probability threshold: 0.5
## [1] "fitting model: Max.cor.Y.cv.0.cp.0.Train.rpart"
## [1] " indep_vars: biddable, startprice.diff"
## Aggregating results
## Selecting tuning parameters
## Fitting cp = 0.00174 on full training set
## Warning in myfit_mdl(model_id = paste0(c(head(mdl_id_components, -1),
## "Train"), : model's bestTune found at an extreme of tuneGrid for parameter:
## cp
## Call:
## rpart(formula = .outcome ~ ., control = list(minsplit = 20, minbucket = 7,
## cp = 0, maxcompete = 4, maxsurrogate = 5, usesurrogate = 2,
## surrogatestyle = 0, maxdepth = 30, xval = 0))
## n= 1859
##
## CP nsplit rel error
## 1 0.515116279 0 1.0000000
## 2 0.147674419 1 0.4848837
## 3 0.001744186 2 0.3372093
##
## Variable importance
## biddable startprice.diff
## 61 39
##
## Node number 1: 1859 observations, complexity param=0.5151163
## predicted class=N expected loss=0.4626143 P(node) =1
## class counts: 999 860
## probabilities: 0.537 0.463
## left son=2 (1022 obs) right son=3 (837 obs)
## Primary splits:
## biddable < 0.5 to the left, improve=277.7532, (0 missing)
## startprice.diff < 41.5325 to the right, improve=181.7181, (0 missing)
## Surrogate splits:
## startprice.diff < 250.1071 to the left, agree=0.557, adj=0.016, (0 split)
##
## Node number 2: 1022 observations
## predicted class=N expected loss=0.2152642 P(node) =0.5497579
## class counts: 802 220
## probabilities: 0.785 0.215
##
## Node number 3: 837 observations, complexity param=0.1476744
## predicted class=Y expected loss=0.2353644 P(node) =0.4502421
## class counts: 197 640
## probabilities: 0.235 0.765
## left son=6 (167 obs) right son=7 (670 obs)
## Primary splits:
## startprice.diff < 59.22341 to the right, improve=173.5195, (0 missing)
##
## Node number 6: 167 observations
## predicted class=N expected loss=0.1197605 P(node) =0.08983324
## class counts: 147 20
## probabilities: 0.880 0.120
##
## Node number 7: 670 observations
## predicted class=Y expected loss=0.07462687 P(node) =0.3604088
## class counts: 50 620
## probabilities: 0.075 0.925
##
## n= 1859
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 1859 860 N (0.53738569 0.46261431)
## 2) biddable< 0.5 1022 220 N (0.78473581 0.21526419) *
## 3) biddable>=0.5 837 197 Y (0.23536440 0.76463560)
## 6) startprice.diff>=59.22341 167 20 N (0.88023952 0.11976048) *
## 7) startprice.diff< 59.22341 670 50 Y (0.07462687 0.92537313) *
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6325855
## 2 0.1 0.6325855
## 3 0.2 0.6583072
## 4 0.3 0.8104575
## 5 0.4 0.8104575
## 6 0.5 0.8104575
## 7 0.6 0.8104575
## 8 0.7 0.8104575
## 9 0.8 0.8104575
## 10 0.9 0.8104575
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.9000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.N
## 1 N 949
## 2 Y 240
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.Y
## 1 50
## 2 620
## Prediction
## Reference N Y
## N 949 50
## Y 240 620
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 8.440022e-01 6.813526e-01 8.266980e-01 8.602130e-01 5.373857e-01
## AccuracyPValue McnemarPValue
## 6.399625e-173 1.276209e-28
## model_id model_method feats
## 1 Max.cor.Y.cv.0.cp.0.Train.rpart rpart biddable, startprice.diff
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 3 1.082 0.015
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 0.8449263 0.9 0.8104575 0.8267914
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.826698 0.860213 0.6496665
## max.AccuracySD.fit max.KappaSD.fit
## 1 0.01674491 0.03182114
## Warning in glb_get_predictions(df = glb_trnobs_df, mdl_id =
## tail(glb_models_df$model_id, : Using default probability threshold: 0.3
## Warning in glb_get_predictions(df = glb_newobs_df, mdl_id =
## tail(glb_models_df$model_id, : Using default probability threshold: 0.3
## [1] "fitting model: Final.glmnet"
## [1] " indep_vars: sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob, sold.fctr.predict.All.X.no.rnorm.Train.rf.prob, sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob"
## Aggregating results
## Selecting tuning parameters
## Fitting alpha = 0.1, lambda = 0.0972 on full training set
## Warning in myfit_mdl(model_id = "Final", model_method = model_method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: alpha
## Warning in myfit_mdl(model_id = "Final", model_method = model_method,
## indep_vars_vctr = indep_vars_vctr, : model's bestTune found at an extreme
## of tuneGrid for parameter: lambda
## Length Class Mode
## a0 100 -none- numeric
## beta 300 dgCMatrix S4
## df 100 -none- numeric
## dim 2 -none- numeric
## lambda 100 -none- numeric
## dev.ratio 100 -none- numeric
## nulldev 1 -none- numeric
## npasses 1 -none- numeric
## jerr 1 -none- numeric
## offset 1 -none- logical
## classnames 2 -none- character
## call 5 -none- call
## nobs 1 -none- numeric
## lambdaOpt 1 -none- numeric
## xNames 3 -none- character
## problemType 1 -none- character
## tuneValue 2 data.frame list
## obsLevels 2 -none- character
## [1] "min lambda > lambdaOpt:"
## (Intercept)
## -2.7019114
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob
## 2.3329814
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob
## 2.3451046
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
## 0.7356721
## [1] "max lambda < lambdaOpt:"
## (Intercept)
## -7.757483
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob
## 8.899955
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob
## 9.018704
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
## -2.293058
## character(0)
## character(0)
## [1] " calling mypredict_mdl for fit:"
## threshold f.score
## 1 0.0 0.6325855
## 2 0.1 0.8194378
## 3 0.2 0.9630459
## 4 0.3 0.9783845
## 5 0.4 0.9907834
## 6 0.5 1.0000000
## 7 0.6 0.9953271
## 8 0.7 0.9486553
## 9 0.8 0.8780170
## 10 0.9 0.8088643
## 11 1.0 0.0000000
## [1] "Classifier Probability Threshold: 0.5000 to maximize f.score.fit"
## sold.fctr sold.fctr.predict.Final.glmnet.N
## 1 N 999
## 2 Y NA
## sold.fctr.predict.Final.glmnet.Y
## 1 NA
## 2 860
## Prediction
## Reference N Y
## N 999 0
## Y 0 860
## Accuracy Kappa AccuracyLower AccuracyUpper AccuracyNull
## 1.0000000 1.0000000 0.9980176 1.0000000 0.5373857
## AccuracyPValue McnemarPValue
## 0.0000000 NaN
## Warning in mypredict_mdl(mdl, df = fit_df, rsp_var, rsp_var_out,
## model_id_method, : Expecting 1 metric: Accuracy; recd: Accuracy, Kappa;
## retaining Accuracy only
## model_id model_method
## 1 Final.glmnet glmnet
## feats
## 1 sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob, sold.fctr.predict.All.X.no.rnorm.Train.rf.prob, sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
## max.nTuningRuns min.elapsedtime.everything min.elapsedtime.final
## 1 9 1.457 0.041
## max.auc.fit opt.prob.threshold.fit max.f.score.fit max.Accuracy.fit
## 1 1 0.5 1 1
## max.AccuracyLower.fit max.AccuracyUpper.fit max.Kappa.fit
## 1 0.9980176 1 1
## max.AccuracySD.fit max.KappaSD.fit
## 1 0 0
rm(ret_lst)
glb_chunks_df <- myadd_chunk(glb_chunks_df, "fit.data.training", major.inc=FALSE)
## label step_major step_minor bgn end elapsed
## 14 fit.data.training 8 0 276.638 419.196 142.558
## 15 fit.data.training 8 1 419.196 NA NA
#```
#```{r fit.data.training_1, cache=FALSE}
#stop(here"); glb_to_sav()
glb_trnobs_df <- glb_get_predictions(df=glb_trnobs_df, mdl_id=glb_fin_mdl_id,
rsp_var_out=glb_rsp_var_out,
prob_threshold_def=ifelse(glb_is_classification && glb_is_binomial,
glb_models_df[glb_models_df$model_id == glb_sel_mdl_id, "opt.prob.threshold.OOB"], NULL))
## Warning in glb_get_predictions(df = glb_trnobs_df, mdl_id =
## glb_fin_mdl_id, : Using default probability threshold: 0.5
glb_featsimp_df <- myget_feats_importance(mdl=glb_fin_mdl, featsimp_df=glb_featsimp_df)
glb_featsimp_df[, paste0(glb_fin_mdl_id, ".importance")] <- glb_featsimp_df$importance
print(glb_featsimp_df)
## Ensemble.glmnet.importance
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob NA
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob NA
## sold.fctr.predict.All.Interact.X.bayesglm.prob 4.587832
## sold.fctr.predict.All.Interact.X.glm.prob 0.000000
## sold.fctr.predict.All.Interact.X.glmnet.prob 0.000000
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob 98.138116
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob 0.000000
## sold.fctr.predict.All.X.bayesglm.prob 0.000000
## sold.fctr.predict.All.X.glm.prob 0.000000
## sold.fctr.predict.All.X.glmnet.prob 0.000000
## sold.fctr.predict.All.X.no.rnorm.rf.prob 100.000000
## sold.fctr.predict.All.X.no.rnorm.rpart.prob 0.000000
## sold.fctr.predict.Interact.High.cor.Y.glm.prob 0.000000
## sold.fctr.predict.Low.cor.X.glm.prob 0.000000
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob 7.031892
## sold.fctr.predict.Max.cor.Y.glm.prob 0.000000
## sold.fctr.predict.Max.cor.Y.rpart.prob 0.000000
## importance
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob 100.00000
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob 99.24746
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob 0.00000
## sold.fctr.predict.All.Interact.X.bayesglm.prob NA
## sold.fctr.predict.All.Interact.X.glm.prob NA
## sold.fctr.predict.All.Interact.X.glmnet.prob NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob NA
## sold.fctr.predict.All.X.bayesglm.prob NA
## sold.fctr.predict.All.X.glm.prob NA
## sold.fctr.predict.All.X.glmnet.prob NA
## sold.fctr.predict.All.X.no.rnorm.rf.prob NA
## sold.fctr.predict.All.X.no.rnorm.rpart.prob NA
## sold.fctr.predict.Interact.High.cor.Y.glm.prob NA
## sold.fctr.predict.Low.cor.X.glm.prob NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob NA
## sold.fctr.predict.Max.cor.Y.glm.prob NA
## sold.fctr.predict.Max.cor.Y.rpart.prob NA
## Final.glmnet.importance
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob 100.00000
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob 99.24746
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob 0.00000
## sold.fctr.predict.All.Interact.X.bayesglm.prob NA
## sold.fctr.predict.All.Interact.X.glm.prob NA
## sold.fctr.predict.All.Interact.X.glmnet.prob NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob NA
## sold.fctr.predict.All.X.bayesglm.prob NA
## sold.fctr.predict.All.X.glm.prob NA
## sold.fctr.predict.All.X.glmnet.prob NA
## sold.fctr.predict.All.X.no.rnorm.rf.prob NA
## sold.fctr.predict.All.X.no.rnorm.rpart.prob NA
## sold.fctr.predict.Interact.High.cor.Y.glm.prob NA
## sold.fctr.predict.Low.cor.X.glm.prob NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob NA
## sold.fctr.predict.Max.cor.Y.glm.prob NA
## sold.fctr.predict.Max.cor.Y.rpart.prob NA
if (glb_is_classification && glb_is_binomial)
glb_analytics_diag_plots(obs_df=glb_trnobs_df, mdl_id=glb_fin_mdl_id,
prob_threshold=glb_models_df[glb_models_df$model_id == glb_sel_mdl_id,
"opt.prob.threshold.OOB"]) else
glb_analytics_diag_plots(obs_df=glb_trnobs_df, mdl_id=glb_fin_mdl_id)
## [1] "Min/Max Boundaries: "
## UniqueID sold.fctr sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## 1 10001 N 0.228
## 1859 11861 N NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rf
## 1 N
## 1859 <NA>
## sold.fctr.predict.All.X.no.rnorm.rf.prob
## 1 0.246
## 1859 NA
## sold.fctr.predict.All.X.no.rnorm.rf
## 1 N
## 1859 <NA>
## sold.fctr.predict.Max.cor.Y.rpart.prob
## 1 0.2115028
## 1859 NA
## sold.fctr.predict.Max.cor.Y.rpart
## 1 N
## 1859 <NA>
## sold.fctr.predict.All.X.no.rnorm.rpart.prob
## 1 0.2115028
## 1859 NA
## sold.fctr.predict.All.X.no.rnorm.rpart
## 1 N
## 1859 <NA>
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob
## 1 0.2115028
## 1859 NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart
## 1 N
## 1859 <NA>
## sold.fctr.predict.All.Interact.X.glmnet.prob
## 1 0.2764637
## 1859 NA
## sold.fctr.predict.All.Interact.X.glmnet
## 1 N
## 1859 <NA>
## sold.fctr.predict.Max.cor.Y.glm.prob sold.fctr.predict.Max.cor.Y.glm
## 1 0.2015561 N
## 1859 NA <NA>
## sold.fctr.predict.All.X.glmnet.prob sold.fctr.predict.All.X.glmnet
## 1 0.2772074 N
## 1859 NA <NA>
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## 1 0.1428571
## 1859 NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart
## 1 N
## 1859 <NA>
## sold.fctr.predict.All.Interact.X.bayesglm.prob
## 1 0.299625
## 1859 NA
## sold.fctr.predict.All.Interact.X.bayesglm
## 1 N
## 1859 <NA>
## sold.fctr.predict.Interact.High.cor.Y.glm.prob
## 1 0.2022016
## 1859 NA
## sold.fctr.predict.Interact.High.cor.Y.glm
## 1 N
## 1859 <NA>
## sold.fctr.predict.All.X.bayesglm.prob
## 1 0.3417333
## 1859 NA
## sold.fctr.predict.All.X.bayesglm sold.fctr.predict.Low.cor.X.glm.prob
## 1 N 0.4592869
## 1859 <NA> NA
## sold.fctr.predict.Low.cor.X.glm sold.fctr.predict.All.X.glm.prob
## 1 N 0.388993
## 1859 <NA> NA
## sold.fctr.predict.All.X.glm sold.fctr.predict.All.Interact.X.glm.prob
## 1 N 1
## 1859 <NA> NA
## sold.fctr.predict.All.Interact.X.glm
## 1 Y
## 1859 <NA>
## sold.fctr.predict.Ensemble.glmnet.prob
## 1 NA
## 1859 0.6425959
## sold.fctr.predict.Ensemble.glmnet
## 1 <NA>
## 1859 Y
## sold.fctr.predict.Ensemble.glmnet.accurate
## 1 NA
## 1859 TRUE
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob
## 1 0.274
## 1859 0.090
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf
## 1 N
## 1859 N
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob
## 1 0.272
## 1859 0.078
## sold.fctr.predict.All.X.no.rnorm.Train.rf
## 1 N
## 1859 N
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
## 1 0.2152642
## 1859 0.2152642
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart
## 1 N
## 1859 N
## sold.fctr.predict.Final.glmnet.prob sold.fctr.predict.Final.glmnet
## 1 0.2195853 N
## 1859 0.1039561 N
## sold.fctr.predict.Final.glmnet.accurate
## 1 TRUE
## 1859 TRUE
## sold.fctr.predict.Final.glmnet.error .label
## 1 0 10001
## 1859 0 11861
## [1] "Inaccurate: "
## [1] UniqueID
## [2] sold.fctr
## [3] sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob
## [4] sold.fctr.predict.All.Interact.X.no.rnorm.rf
## [5] sold.fctr.predict.All.X.no.rnorm.rf.prob
## [6] sold.fctr.predict.All.X.no.rnorm.rf
## [7] sold.fctr.predict.Max.cor.Y.rpart.prob
## [8] sold.fctr.predict.Max.cor.Y.rpart
## [9] sold.fctr.predict.All.X.no.rnorm.rpart.prob
## [10] sold.fctr.predict.All.X.no.rnorm.rpart
## [11] sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob
## [12] sold.fctr.predict.All.Interact.X.no.rnorm.rpart
## [13] sold.fctr.predict.All.Interact.X.glmnet.prob
## [14] sold.fctr.predict.All.Interact.X.glmnet
## [15] sold.fctr.predict.Max.cor.Y.glm.prob
## [16] sold.fctr.predict.Max.cor.Y.glm
## [17] sold.fctr.predict.All.X.glmnet.prob
## [18] sold.fctr.predict.All.X.glmnet
## [19] sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob
## [20] sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart
## [21] sold.fctr.predict.All.Interact.X.bayesglm.prob
## [22] sold.fctr.predict.All.Interact.X.bayesglm
## [23] sold.fctr.predict.Interact.High.cor.Y.glm.prob
## [24] sold.fctr.predict.Interact.High.cor.Y.glm
## [25] sold.fctr.predict.All.X.bayesglm.prob
## [26] sold.fctr.predict.All.X.bayesglm
## [27] sold.fctr.predict.Low.cor.X.glm.prob
## [28] sold.fctr.predict.Low.cor.X.glm
## [29] sold.fctr.predict.All.X.glm.prob
## [30] sold.fctr.predict.All.X.glm
## [31] sold.fctr.predict.All.Interact.X.glm.prob
## [32] sold.fctr.predict.All.Interact.X.glm
## [33] sold.fctr.predict.Ensemble.glmnet.prob
## [34] sold.fctr.predict.Ensemble.glmnet
## [35] sold.fctr.predict.Ensemble.glmnet.accurate
## [36] sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob
## [37] sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf
## [38] sold.fctr.predict.All.X.no.rnorm.Train.rf.prob
## [39] sold.fctr.predict.All.X.no.rnorm.Train.rf
## [40] sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
## [41] sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart
## [42] sold.fctr.predict.Final.glmnet.prob
## [43] sold.fctr.predict.Final.glmnet
## [44] sold.fctr.predict.Final.glmnet.accurate
## [45] sold.fctr.predict.Final.glmnet.error
## <0 rows> (or 0-length row.names)
dsp_feats_vctr <- c(NULL)
for(var in grep(".importance", names(glb_feats_df), fixed=TRUE, value=TRUE))
dsp_feats_vctr <- union(dsp_feats_vctr,
glb_feats_df[!is.na(glb_feats_df[, var]), "id"])
# print(glb_trnobs_df[glb_trnobs_df$UniqueID %in% FN_OOB_ids,
# grep(glb_rsp_var, names(glb_trnobs_df), value=TRUE)])
print(setdiff(names(glb_trnobs_df), names(glb_allobs_df)))
## [1] "sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob"
## [2] "sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf"
## [3] "sold.fctr.predict.All.X.no.rnorm.Train.rf.prob"
## [4] "sold.fctr.predict.All.X.no.rnorm.Train.rf"
## [5] "sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob"
## [6] "sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart"
## [7] "sold.fctr.predict.Final.glmnet.prob"
## [8] "sold.fctr.predict.Final.glmnet"
for (col in setdiff(names(glb_trnobs_df), names(glb_allobs_df)))
# Merge or cbind ?
glb_allobs_df[glb_allobs_df$.src == "Train", col] <- glb_trnobs_df[, col]
print(setdiff(names(glb_fitobs_df), names(glb_allobs_df)))
## character(0)
print(setdiff(names(glb_OOBobs_df), names(glb_allobs_df)))
## character(0)
for (col in setdiff(names(glb_OOBobs_df), names(glb_allobs_df)))
# Merge or cbind ?
glb_allobs_df[glb_allobs_df$.lcn == "OOB", col] <- glb_OOBobs_df[, col]
print(setdiff(names(glb_newobs_df), names(glb_allobs_df)))
## character(0)
if (glb_save_envir)
save(glb_feats_df, glb_allobs_df,
#glb_trnobs_df, glb_fitobs_df, glb_OOBobs_df, glb_newobs_df,
glb_models_df, dsp_models_df, glb_models_lst, glb_model_type,
glb_sel_mdl, glb_sel_mdl_id,
glb_fin_mdl, glb_fin_mdl_id,
file=paste0(glb_out_pfx, "dsk.RData"))
replay.petrisim(pn=glb_analytics_pn,
replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs,
"data.training.all.prediction","model.final")), flip_coord=TRUE)
## time trans "bgn " "fit.data.training.all " "predict.data.new " "end "
## 0.0000 multiple enabled transitions: data.training.all data.new model.selected firing: data.training.all
## 1.0000 1 2 1 0 0
## 1.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction firing: data.new
## 2.0000 2 1 1 1 0
## 2.0000 multiple enabled transitions: data.training.all data.new model.selected model.final data.training.all.prediction data.new.prediction firing: model.selected
## 3.0000 3 0 2 1 0
## 3.0000 multiple enabled transitions: model.final data.training.all.prediction data.new.prediction firing: data.training.all.prediction
## 4.0000 5 0 1 1 1
## 4.0000 multiple enabled transitions: model.final data.training.all.prediction data.new.prediction firing: model.final
## 5.0000 4 0 0 2 1
glb_chunks_df <- myadd_chunk(glb_chunks_df, "predict.data.new", major.inc=TRUE)
## label step_major step_minor bgn end elapsed
## 15 fit.data.training 8 1 419.196 425.621 6.426
## 16 predict.data.new 9 0 425.622 NA NA
9.0: predict data new# Compute final model predictions
# sp_ only
# rsp_var_out <- paste0(glb_rsp_var_out, glb_fin_mdl_id)
# tmp_trnobs_df <- glb_get_predictions(glb_trnobs_df, mdl_id=glb_fin_mdl_id,
# rsp_var_out=glb_rsp_var_out,
# prob_threshold_def=ifelse(glb_is_classification && glb_is_binomial,
# glb_models_df[glb_models_df$model_id == glb_sel_mdl_id,
# "opt.prob.threshold.OOB"], NULL))
# tmp_newobs_df <- glb_get_predictions(glb_newobs_df, mdl_id=glb_fin_mdl_id,
# rsp_var_out=glb_rsp_var_out,
# prob_threshold_def=ifelse(glb_is_classification && glb_is_binomial,
# glb_models_df[glb_models_df$model_id == glb_sel_mdl_id,
# "opt.prob.threshold.OOB"], NULL))
#
# tmp_allobs_df <- orderBy(~UniqueID,
# rbind(tmp_trnobs_df[, c(glb_id_var, glb_rsp_var, rsp_var_out)],
# tmp_newobs_df[, c(glb_id_var, glb_rsp_var, rsp_var_out)]))
# names(tmp_allobs_df)[3] <- glb_rsp_var_out
# write.csv(tmp_allobs_df, paste0(glb_out_pfx, "predict.csv"), row.names=FALSE)
###
glb_newobs_df <- glb_get_predictions(glb_newobs_df, mdl_id=glb_fin_mdl_id,
rsp_var_out=glb_rsp_var_out,
prob_threshold_def=ifelse(glb_is_classification && glb_is_binomial,
glb_models_df[glb_models_df$model_id == glb_sel_mdl_id,
"opt.prob.threshold.OOB"], NULL))
## Warning in glb_get_predictions(glb_newobs_df, mdl_id = glb_fin_mdl_id,
## rsp_var_out = glb_rsp_var_out, : Using default probability threshold: 0.5
if (glb_is_classification && glb_is_binomial)
glb_analytics_diag_plots(obs_df=glb_newobs_df, mdl_id=glb_fin_mdl_id,
prob_threshold=glb_models_df[glb_models_df$model_id == glb_sel_mdl_id,
"opt.prob.threshold.OOB"]) else
glb_analytics_diag_plots(obs_df=glb_newobs_df, mdl_id=glb_fin_mdl_id)
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf
## Warning: Removed 798 rows containing missing values (geom_point).
## Warning: Removed 798 rows containing missing values (geom_point).
## [1] "Min/Max Boundaries: "
## UniqueID sold.fctr
## 1860 11862 <NA>
## 2657 12659 <NA>
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob
## 1860 0.546
## 2657 0.274
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf
## 1860 N
## 2657 N
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob
## 1860 0.480
## 2657 0.198
## sold.fctr.predict.All.X.no.rnorm.Train.rf
## 1860 N
## 2657 N
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
## 1860 0.2152642
## 2657 0.2152642
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart
## 1860 N
## 2657 N
## sold.fctr.predict.Final.glmnet.prob sold.fctr.predict.Final.glmnet
## 1860 0.4641735 N
## 2657 0.1912421 N
## sold.fctr.predict.Final.glmnet.accurate
## 1860 NA
## 2657 NA
## sold.fctr.predict.Final.glmnet.error .label
## 1860 0 11862
## 2657 0 12659
## [1] "Inaccurate: "
## UniqueID sold.fctr
## NA NA <NA>
## NA.1 NA <NA>
## NA.2 NA <NA>
## NA.3 NA <NA>
## NA.4 NA <NA>
## NA.5 NA <NA>
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob
## NA NA
## NA.1 NA
## NA.2 NA
## NA.3 NA
## NA.4 NA
## NA.5 NA
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf
## NA <NA>
## NA.1 <NA>
## NA.2 <NA>
## NA.3 <NA>
## NA.4 <NA>
## NA.5 <NA>
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob
## NA NA
## NA.1 NA
## NA.2 NA
## NA.3 NA
## NA.4 NA
## NA.5 NA
## sold.fctr.predict.All.X.no.rnorm.Train.rf
## NA <NA>
## NA.1 <NA>
## NA.2 <NA>
## NA.3 <NA>
## NA.4 <NA>
## NA.5 <NA>
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
## NA NA
## NA.1 NA
## NA.2 NA
## NA.3 NA
## NA.4 NA
## NA.5 NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart
## NA <NA>
## NA.1 <NA>
## NA.2 <NA>
## NA.3 <NA>
## NA.4 <NA>
## NA.5 <NA>
## sold.fctr.predict.Final.glmnet.prob sold.fctr.predict.Final.glmnet
## NA NA <NA>
## NA.1 NA <NA>
## NA.2 NA <NA>
## NA.3 NA <NA>
## NA.4 NA <NA>
## NA.5 NA <NA>
## sold.fctr.predict.Final.glmnet.accurate
## NA NA
## NA.1 NA
## NA.2 NA
## NA.3 NA
## NA.4 NA
## NA.5 NA
## sold.fctr.predict.Final.glmnet.error
## NA NA
## NA.1 NA
## NA.2 NA
## NA.3 NA
## NA.4 NA
## NA.5 NA
## UniqueID sold.fctr
## NA.214 NA <NA>
## NA.252 NA <NA>
## NA.381 NA <NA>
## NA.541 NA <NA>
## NA.542 NA <NA>
## NA.737 NA <NA>
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob
## NA.214 NA
## NA.252 NA
## NA.381 NA
## NA.541 NA
## NA.542 NA
## NA.737 NA
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf
## NA.214 <NA>
## NA.252 <NA>
## NA.381 <NA>
## NA.541 <NA>
## NA.542 <NA>
## NA.737 <NA>
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob
## NA.214 NA
## NA.252 NA
## NA.381 NA
## NA.541 NA
## NA.542 NA
## NA.737 NA
## sold.fctr.predict.All.X.no.rnorm.Train.rf
## NA.214 <NA>
## NA.252 <NA>
## NA.381 <NA>
## NA.541 <NA>
## NA.542 <NA>
## NA.737 <NA>
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
## NA.214 NA
## NA.252 NA
## NA.381 NA
## NA.541 NA
## NA.542 NA
## NA.737 NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart
## NA.214 <NA>
## NA.252 <NA>
## NA.381 <NA>
## NA.541 <NA>
## NA.542 <NA>
## NA.737 <NA>
## sold.fctr.predict.Final.glmnet.prob sold.fctr.predict.Final.glmnet
## NA.214 NA <NA>
## NA.252 NA <NA>
## NA.381 NA <NA>
## NA.541 NA <NA>
## NA.542 NA <NA>
## NA.737 NA <NA>
## sold.fctr.predict.Final.glmnet.accurate
## NA.214 NA
## NA.252 NA
## NA.381 NA
## NA.541 NA
## NA.542 NA
## NA.737 NA
## sold.fctr.predict.Final.glmnet.error
## NA.214 NA
## NA.252 NA
## NA.381 NA
## NA.541 NA
## NA.542 NA
## NA.737 NA
## UniqueID sold.fctr
## NA.792 NA <NA>
## NA.793 NA <NA>
## NA.794 NA <NA>
## NA.795 NA <NA>
## NA.796 NA <NA>
## NA.797 NA <NA>
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob
## NA.792 NA
## NA.793 NA
## NA.794 NA
## NA.795 NA
## NA.796 NA
## NA.797 NA
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf
## NA.792 <NA>
## NA.793 <NA>
## NA.794 <NA>
## NA.795 <NA>
## NA.796 <NA>
## NA.797 <NA>
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob
## NA.792 NA
## NA.793 NA
## NA.794 NA
## NA.795 NA
## NA.796 NA
## NA.797 NA
## sold.fctr.predict.All.X.no.rnorm.Train.rf
## NA.792 <NA>
## NA.793 <NA>
## NA.794 <NA>
## NA.795 <NA>
## NA.796 <NA>
## NA.797 <NA>
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob
## NA.792 NA
## NA.793 NA
## NA.794 NA
## NA.795 NA
## NA.796 NA
## NA.797 NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart
## NA.792 <NA>
## NA.793 <NA>
## NA.794 <NA>
## NA.795 <NA>
## NA.796 <NA>
## NA.797 <NA>
## sold.fctr.predict.Final.glmnet.prob sold.fctr.predict.Final.glmnet
## NA.792 NA <NA>
## NA.793 NA <NA>
## NA.794 NA <NA>
## NA.795 NA <NA>
## NA.796 NA <NA>
## NA.797 NA <NA>
## sold.fctr.predict.Final.glmnet.accurate
## NA.792 NA
## NA.793 NA
## NA.794 NA
## NA.795 NA
## NA.796 NA
## NA.797 NA
## sold.fctr.predict.Final.glmnet.error
## NA.792 NA
## NA.793 NA
## NA.794 NA
## NA.795 NA
## NA.796 NA
## NA.797 NA
## Warning: Removed 798 rows containing missing values (geom_point).
if (glb_is_classification && glb_is_binomial) {
submit_df <- glb_newobs_df[, c(glb_id_var,
paste0(glb_rsp_var_out, glb_fin_mdl_id, ".prob"))]
names(submit_df)[2] <- "Probability1"
# submit_df <- glb_newobs_df[, c(paste0(glb_rsp_var_out, glb_fin_mdl_id)), FALSE]
# names(submit_df)[1] <- "BDscience"
# submit_df$BDscience <- as.numeric(submit_df$BDscience) - 1
# #submit_df <-rbind(submit_df, data.frame(bdanalytics=c(" ")))
# print("Submission Stats:")
# print(table(submit_df$BDscience, useNA = "ifany"))
glb_force_prediction_lst <- list()
glb_force_prediction_lst[["0"]] <- c(11885, 11907, 11943,
12050, 12115, 12171,
12253, 12285, 12367, 12388, 12399,
12585)
for (obs_id in glb_force_prediction_lst[["0"]]) {
if (is.na(glb_allobs_df[glb_allobs_df[, glb_id_var] == obs_id, ".grpid"]))
stop(".grpid is NA")
submit_df[submit_df[, glb_id_var] == obs_id, "Probability1"] <-
max(0, submit_df[submit_df[, glb_id_var] == obs_id, "Probability1"] - 0.5)
}
rsp_var_out <- paste0(glb_rsp_var_out, glb_fin_mdl_id)
for (obs_id in glb_newobs_df[!is.na(glb_newobs_df[, rsp_var_out]) &
(glb_newobs_df[, rsp_var_out] == "Y") &
(glb_newobs_df[ , "startprice"] > 675), "UniqueID"])
submit_df[submit_df[, glb_id_var] == obs_id, "Probability1"] <-
max(0, submit_df[submit_df[, glb_id_var] == obs_id, "Probability1"] - 0.5)
glb_force_prediction_lst[["1"]] <- c(11871, 11875, 11886,
11913, 11931, 11937, 11967, 11990, 11991, 11994, 11999,
12000, 12002, 12018, 12021, 12065, 12072,
12111, 12114, 12126, 12152, 12172,
12213, 12214, 12233, 12265, 12278, 12299,
12446, 12491,
12505, 12576, 12608, 12630)
for (obs_id in glb_force_prediction_lst[["1"]]) {
if (is.na(glb_allobs_df[glb_allobs_df[, glb_id_var] == obs_id, ".grpid"]))
stop(".grpid is NA")
submit_df[submit_df[, glb_id_var] == obs_id, "Probability1"] <-
min(0.9999, submit_df[submit_df[, glb_id_var] == obs_id, "Probability1"] + 0.5)
}
} else submit_df <- glb_newobs_df[, c(glb_id_var,
paste0(glb_rsp_var_out, glb_fin_mdl_id))]
if (glb_is_classification) {
rsp_var_out <- paste0(glb_rsp_var_out, glb_fin_mdl_id)
tmp_newobs_df <- subset(glb_newobs_df[, c(glb_id_var, ".grpid", rsp_var_out)],
!is.na(.grpid))
tmp_newobs_df <- merge(tmp_newobs_df, dupgrps_df, by=".grpid", all.x=TRUE)
tmp_newobs_df <- merge(tmp_newobs_df, submit_df, by=glb_id_var, all.x = TRUE)
tmp_newobs_df$.err <-
((tmp_newobs_df$Probability1 > 0.5) & (tmp_newobs_df$sold.0 > 0) |
(tmp_newobs_df$Probability1 < 0.5) & (tmp_newobs_df$sold.1 > 0))
tmp_newobs_df <- orderBy(~UniqueID, subset(tmp_newobs_df, .err == TRUE))
print("Prediction errors in duplicates:")
print(tmp_newobs_df)
if (nrow(tmp_newobs_df) > 0)
stop("check Prediction errors in duplicates")
#print(dupobs_df[dupobs_df$.grpid == 26, ])
tmp_newobs_df <- cbind(glb_newobs_df, submit_df[, "Probability1", FALSE])
if (max(glb_newobs_df[!is.na(glb_newobs_df[, rsp_var_out]) &
(tmp_newobs_df[, "Probability1"] >= 0.5), "startprice"]) >
max(glb_allobs_df[!is.na(glb_allobs_df[, glb_rsp_var]) &
(glb_allobs_df[, glb_rsp_var] == "Y"), "startprice"]))
stop("startprice for some +ve predictions > 675")
}
## [1] "Prediction errors in duplicates:"
## [1] UniqueID .grpid
## [3] sold.fctr.predict.Final.glmnet sold.0
## [5] sold.1 sold.NA
## [7] .freq Probability1
## [9] .err
## <0 rows> (or 0-length row.names)
submit_fname <- paste0(gsub(".", "_", paste0(glb_out_pfx, glb_fin_mdl_id), fixed=TRUE),
"_submit.csv")
write.csv(submit_df, submit_fname, quote=FALSE, row.names=FALSE)
#cat(" ", "\n", file=submit_fn, append=TRUE)
# print(orderBy(~ -max.auc.OOB, glb_models_df[, c("model_id",
# "max.auc.OOB", "max.Accuracy.OOB")]))
for (txt_var in glb_txt_vars) {
# Print post-stem-words but need post-stop-words for debugging ?
print(sprintf(" All post-stem-words TfIDf terms for %s:", txt_var))
myprint_df(glb_post_stem_words_terms_df_lst[[txt_var]])
TfIdf_mtrx <- glb_post_stem_words_TfIdf_mtrx_lst[[txt_var]]
print(glb_allobs_df[
which(TfIdf_mtrx[, tail(glb_post_stem_words_terms_df_lst[[txt_var]], 1)$pos] > 0),
c(glb_id_var, glb_txt_vars)])
print(nrow(subset(glb_post_stem_words_terms_df_lst[[txt_var]], freq == 1)))
#print(glb_allobs_df[which(TfIdf_mtrx[, 207] > 0), c(glb_id_var, glb_txt_vars)])
#unlist(strsplit(glb_allobs_df[2157, "description"], ""))
#glb_allobs_df[2442, c(glb_id_var, glb_txt_vars)]
#TfIdf_mtrx[2442, TfIdf_mtrx[2442, ] > 0]
print(sprintf(" Top_n post_stem_words TfIDf terms for %s:", txt_var))
tmp_df <- glb_post_stem_words_terms_df_lst[[txt_var]]
top_n_vctr <- tmp_df$term[1:glb_txt_top_n[[txt_var]]]
tmp_freq1_df <- subset(tmp_df, freq == 1)
tmp_freq1_df$top_n <- grepl(paste0(top_n_vctr, collapse="|"), tmp_freq1_df$term)
print(subset(tmp_freq1_df, top_n == TRUE))
}
## [1] " All post-stem-words TfIDf terms for descr.my:"
## TfIdf term freq pos cor.y cor.y.abs TfIdf.N
## condit 207.7156 condit 499 137 -0.0418798096 0.0418798096 82.38883
## use 144.7700 use 291 709 0.0103720246 0.0103720246 51.46753
## scratch 126.4831 scratch 286 565 -0.0088060862 0.0088060862 49.35848
## new 124.1683 new 156 429 -0.0372353149 0.0372353149 50.77429
## good 120.3335 good 197 281 -0.0004368629 0.0004368629 44.58392
## screen 105.7897 screen 213 566 0.0232373651 0.0232373651 36.89203
## TfIdf.Y TfIdf.NA
## condit 56.35056 68.97623
## use 47.66515 45.63736
## scratch 40.20165 36.92302
## new 30.11628 43.27771
## good 38.21866 37.53088
## screen 37.92265 30.97500
## TfIdf term freq pos cor.y cor.y.abs TfIdf.N
## almost 14.463235 almost 12 39 0.009256155 0.009256155 4.5360969
## retail 11.143946 retail 9 551 -0.004221734 0.004221734 3.0874822
## awesom 4.076122 awesom 2 65 -0.021525023 0.021525023 2.5938956
## first 2.939748 first 2 249 -0.021525023 0.021525023 2.0751165
## headphon 2.017474 headphon 2 298 -0.021525023 0.021525023 0.8646319
## therefor 1.137558 therefor 1 670 -0.021525023 0.021525023 1.1375583
## TfIdf.Y TfIdf.NA
## almost 5.323590 4.6035482
## retail 2.312503 5.7439603
## awesom 0.000000 1.4822261
## first 0.000000 0.8646319
## headphon 0.000000 1.1528425
## therefor 0.000000 0.0000000
## TfIdf term freq pos cor.y cor.y.abs TfIdf.N
## red 0.8125416 red 1 532 NA NA 0.0000000
## version 0.8125416 version 1 716 -0.02152502 0.02152502 0.8125416
## adaptor 0.7583722 adaptor 1 31 0.02500407 0.02500407 0.0000000
## divid 0.7583722 divid 1 194 0.02500407 0.02500407 0.0000000
## grey 0.7583722 grey 1 286 0.02500407 0.02500407 0.0000000
## hdmi 0.7583722 hdmi 1 297 0.02500407 0.02500407 0.0000000
## TfIdf.Y TfIdf.NA
## red 0.0000000 0.8125416
## version 0.0000000 0.0000000
## adaptor 0.7583722 0.0000000
## divid 0.7583722 0.0000000
## grey 0.7583722 0.0000000
## hdmi 0.7583722 0.0000000
## UniqueID
## 114 10114
## descr.my
## 114 comes with: grey ipad divider case, white stylus, Apple USB wall charger and USB cord. HDMI adaptor
## [1] 285
## [1] " Top_n post_stem_words TfIDf terms for descr.my:"
## TfIdf term freq pos cor.y cor.y.abs TfIdf.N
## appli 2.843896 appli 1 54 0.02500407 0.02500407 0.000000
## showroom 1.895930 showroom 1 592 NA NA 0.000000
## backlit 1.625083 backlit 1 68 -0.02152502 0.02152502 1.625083
## paperwork 1.421948 paperwork 1 464 NA NA 0.000000
## seen 1.421948 seen 1 575 -0.02152502 0.02152502 1.421948
## backlight 1.263954 backlight 1 67 0.02500407 0.02500407 0.000000
## seem 1.137558 seem 1 574 NA NA 0.000000
## TfIdf.Y TfIdf.NA top_n
## appli 2.843896 0.000000 TRUE
## showroom 0.000000 1.895930 TRUE
## backlit 0.000000 0.000000 TRUE
## paperwork 0.000000 1.421948 TRUE
## seen 0.000000 0.000000 TRUE
## backlight 1.263954 0.000000 TRUE
## seem 0.000000 1.137558 TRUE
if (glb_is_classification && glb_is_binomial)
print(glb_models_df[glb_models_df$model_id == glb_sel_mdl_id,
"opt.prob.threshold.OOB"])
## [1] 0.5
print(sprintf("glb_sel_mdl_id: %s", glb_sel_mdl_id))
## [1] "glb_sel_mdl_id: Ensemble.glmnet"
print(sprintf("glb_fin_mdl_id: %s", glb_fin_mdl_id))
## [1] "glb_fin_mdl_id: Final.glmnet"
print(dim(glb_fitobs_df))
## [1] 969 201
print(dsp_models_df)
## model_id max.Accuracy.OOB max.auc.OOB
## 18 All.Interact.X.no.rnorm.rf 0.8483146 0.9142644
## 13 All.X.no.rnorm.rf 0.8438202 0.9180131
## 5 Max.cor.Y.rpart 0.8426966 0.8469855
## 12 All.X.no.rnorm.rpart 0.8426966 0.8469855
## 17 All.Interact.X.no.rnorm.rpart 0.8426966 0.8469855
## 16 All.Interact.X.glmnet 0.8359551 0.8742088
## 6 Max.cor.Y.glm 0.8348315 0.8659702
## 11 All.X.glmnet 0.8325843 0.8560007
## 4 Max.cor.Y.cv.0.cp.0.rpart 0.8202247 0.8997924
## 15 All.Interact.X.bayesglm 0.8179775 0.8660362
## 7 Interact.High.cor.Y.glm 0.8146067 0.8576352
## 10 All.X.bayesglm 0.7842697 0.8427064
## 8 Low.cor.X.glm 0.7786517 0.8382546
## 9 All.X.glm 0.7741573 0.8308232
## 14 All.Interact.X.glm 0.6797753 0.6856640
## 1 MFO.myMFO_classfr 0.5359551 0.5000000
## 3 Max.cor.Y.cv.0.rpart 0.5359551 0.5000000
## 2 Random.myrandom_classfr 0.4640449 0.5185354
## max.Kappa.OOB min.aic.fit opt.prob.threshold.OOB
## 18 0.6930078 NA 0.6
## 13 0.6854548 NA 0.5
## 5 0.6791719 NA 0.9
## 12 0.6791719 NA 0.9
## 17 0.6791719 NA 0.9
## 16 0.6661923 NA 0.6
## 6 0.6639612 883.4623 0.7
## 11 0.6580401 NA 0.7
## 4 0.6403332 NA 0.3
## 15 0.6319103 1164.3831 0.5
## 7 0.6240496 887.8417 0.6
## 10 0.5654496 1056.6761 0.5
## 8 0.5546405 914.1270 0.5
## 9 0.5454499 931.5575 0.5
## 14 0.3658021 14993.8106 0.9
## 1 0.0000000 NA 0.5
## 3 0.0000000 NA 0.5
## 2 0.0000000 NA 0.4
if (glb_is_regression) {
print(sprintf("%s OOB RMSE: %0.4f", glb_sel_mdl_id,
glb_models_df[glb_models_df$model_id == glb_sel_mdl_id, "min.RMSE.OOB"]))
if (!is.null(glb_category_var)) {
tmp_OOBobs_df <- glb_OOBobs_df[, c(glb_category_var, glb_rsp_var,
predct_error_var_name)]
names(tmp_OOBobs_df)[length(names(tmp_OOBobs_df))] <- "error.abs.OOB"
sOOB_ctgry_df <- dplyr::group_by_(tmp_OOBobs_df, glb_category_var)
sOOB_ctgry_df <- dplyr::count(sOOB_ctgry_df,
startprice.OOB.sum = sum(startprice),
err.abs.OOB.sum = sum(error.abs.OOB),
err.abs.OOB.mean = mean(error.abs.OOB))
names(sOOB_ctgry_df)[4] <- ".n.OOB"
sOOB_ctgry_df <- dplyr::ungroup(sOOB_ctgry_df)
#intersect(names(glb_ctgry_df), names(sOOB_ctgry_df))
glb_ctgry_df <- merge(glb_ctgry_df, sOOB_ctgry_df, all=TRUE)
print(orderBy(~-err.abs.OOB.mean, glb_ctgry_df))
}
if ((glb_rsp_var %in% names(glb_newobs_df)) &&
!(any(is.na(glb_newobs_df[, glb_rsp_var])))) {
pred_stats_df <-
mypredict_mdl(mdl=glb_models_lst[[glb_fin_mdl_id]],
df=glb_newobs_df,
rsp_var=glb_rsp_var,
rsp_var_out=glb_rsp_var_out,
model_id_method=glb_fin_mdl_id,
label="new",
model_summaryFunction=glb_sel_mdl$control$summaryFunction,
model_metric=glb_sel_mdl$metric,
model_metric_maximize=glb_sel_mdl$maximize,
ret_type="stats")
print(sprintf("%s prediction stats for glb_newobs_df:", glb_fin_mdl_id))
print(pred_stats_df)
}
}
if (glb_is_classification) {
print(sprintf("%s OOB confusion matrix & accuracy: ", glb_sel_mdl_id))
print(t(confusionMatrix(glb_OOBobs_df[, paste0(glb_rsp_var_out, glb_sel_mdl_id)],
glb_OOBobs_df[, glb_rsp_var])$table))
if (!is.null(glb_category_var)) {
tmp_OOBobs_df <- glb_OOBobs_df[, c(glb_category_var, predct_accurate_var_name)]
names(tmp_OOBobs_df)[length(names(tmp_OOBobs_df))] <- "accurate.OOB"
aOOB_ctgry_df <- mycreate_xtab_df(tmp_OOBobs_df, names(tmp_OOBobs_df))
aOOB_ctgry_df[is.na(aOOB_ctgry_df)] <- 0
aOOB_ctgry_df <- mutate(aOOB_ctgry_df,
.n.OOB = accurate.OOB.FALSE + accurate.OOB.TRUE,
max.accuracy.OOB = accurate.OOB.TRUE / .n.OOB)
#intersect(names(glb_ctgry_df), names(aOOB_ctgry_df))
glb_ctgry_df <- merge(glb_ctgry_df, aOOB_ctgry_df, all=TRUE)
print(orderBy(~-accurate.OOB.FALSE, glb_ctgry_df))
print(glb_OOBobs_df[(glb_OOBobs_df$prdline.my == "iPadAir") &
!(glb_OOBobs_df[, predct_accurate_var_name]),
c(glb_id_var, glb_rsp_var_raw,
#"description"
"biddable", "startprice", "condition"
)])
}
if ((glb_rsp_var %in% names(glb_newobs_df)) &&
!(any(is.na(glb_newobs_df[, glb_rsp_var])))) {
print(sprintf("%s new confusion matrix & accuracy: ", glb_fin_mdl_id))
print(t(confusionMatrix(glb_newobs_df[, paste0(glb_rsp_var_out, glb_fin_mdl_id)],
glb_newobs_df[, glb_rsp_var])$table))
}
}
## [1] "Ensemble.glmnet OOB confusion matrix & accuracy: "
## Prediction
## Reference N Y
## N 415 62
## Y 75 338
## prdl.my.descr.fctr .n.OOB .n.Tst .freqRatio.Tst .freqRatio.OOB
## 5 iPad 2#0 93 83 0.10401003 0.10449438
## 9 iPadAir#0 98 88 0.11027569 0.11011236
## 11 iPadmini 2+#0 71 64 0.08020050 0.07977528
## 6 iPad 2#1 79 71 0.08897243 0.08876404
## 3 iPad 1#0 52 46 0.05764411 0.05842697
## 8 iPad 3+#1 71 64 0.08020050 0.07977528
## 14 iPadmini#1 54 49 0.06140351 0.06067416
## 1 Unknown#0 50 45 0.05639098 0.05617978
## 4 iPad 1#1 48 43 0.05388471 0.05393258
## 7 iPad 3+#0 66 59 0.07393484 0.07415730
## 13 iPadmini#0 73 65 0.08145363 0.08202247
## 2 Unknown#1 47 42 0.05263158 0.05280899
## 10 iPadAir#1 54 49 0.06140351 0.06067416
## 12 iPadmini 2+#1 34 30 0.03759398 0.03820225
## accurate.OOB.FALSE accurate.OOB.TRUE max.accuracy.OOB
## 5 20 73 0.7849462
## 9 16 82 0.8367347
## 11 14 57 0.8028169
## 6 11 68 0.8607595
## 3 10 42 0.8076923
## 8 10 61 0.8591549
## 14 9 45 0.8333333
## 1 8 42 0.8400000
## 4 8 40 0.8333333
## 7 8 58 0.8787879
## 13 8 65 0.8904110
## 2 7 40 0.8510638
## 10 4 50 0.9259259
## 12 4 30 0.8823529
## UniqueID sold biddable startprice condition
## 1156 11156 0 1 299.99 New
## 1562 11563 0 0 300.00 Used
## 19 10019 1 0 375.00 Used
## 51 10051 1 0 614.99 New
## 109 10109 1 0 339.99 New
## 277 10277 1 0 300.00 Used
## 297 10297 1 1 490.00 New
## 535 10535 1 1 380.00 New
## 1059 11059 1 0 500.00 New
## 1132 11132 1 0 339.00 Used
## 1200 11200 1 0 379.99 Used
## 1212 11212 1 0 450.00 New
## 1218 11218 1 0 349.99 Used
## 1353 11354 1 0 300.00 Used
## 1381 11382 1 0 439.99 New
## 1604 11605 1 0 229.00 For parts or not working
## 353 10353 1 0 292.50 Used
## 436 10436 1 0 500.00 Used
## 675 10675 1 0 280.00 Used
## 794 10794 1 1 525.00 Used
dsp_myCategory_conf_mtrx <- function(myCategory) {
print(sprintf("%s OOB::myCategory=%s confusion matrix & accuracy: ",
glb_sel_mdl_id, myCategory))
print(t(confusionMatrix(
glb_OOBobs_df[glb_OOBobs_df$myCategory == myCategory,
paste0(glb_rsp_var_out, glb_sel_mdl_id)],
glb_OOBobs_df[glb_OOBobs_df$myCategory == myCategory, glb_rsp_var])$table))
print(sum(glb_OOBobs_df[glb_OOBobs_df$myCategory == myCategory,
predct_accurate_var_name]) /
nrow(glb_OOBobs_df[glb_OOBobs_df$myCategory == myCategory, ]))
err_ids <- glb_OOBobs_df[(glb_OOBobs_df$myCategory == myCategory) &
(!glb_OOBobs_df[, predct_accurate_var_name]), glb_id_var]
OOB_FNerr_df <- glb_OOBobs_df[(glb_OOBobs_df$UniqueID %in% err_ids) &
(glb_OOBobs_df$Popular == 1),
c(
".clusterid",
"Popular", "Headline", "Snippet", "Abstract")]
print(sprintf("%s OOB::myCategory=%s FN errors: %d", glb_sel_mdl_id, myCategory,
nrow(OOB_FNerr_df)))
print(OOB_FNerr_df)
OOB_FPerr_df <- glb_OOBobs_df[(glb_OOBobs_df$UniqueID %in% err_ids) &
(glb_OOBobs_df$Popular == 0),
c(
".clusterid",
"Popular", "Headline", "Snippet", "Abstract")]
print(sprintf("%s OOB::myCategory=%s FP errors: %d", glb_sel_mdl_id, myCategory,
nrow(OOB_FPerr_df)))
print(OOB_FPerr_df)
}
#dsp_myCategory_conf_mtrx(myCategory="OpEd#Opinion#")
#dsp_myCategory_conf_mtrx(myCategory="Business#Business Day#Dealbook")
#dsp_myCategory_conf_mtrx(myCategory="##")
# if (glb_is_classification) {
# print("FN_OOB_ids:")
# print(glb_OOBobs_df[glb_OOBobs_df$UniqueID %in% FN_OOB_ids,
# grep(glb_rsp_var, names(glb_OOBobs_df), value=TRUE)])
# print(glb_OOBobs_df[glb_OOBobs_df$UniqueID %in% FN_OOB_ids,
# glb_txt_vars])
# print(dsp_vctr <- colSums(glb_OOBobs_df[glb_OOBobs_df$UniqueID %in% FN_OOB_ids,
# setdiff(grep("[HSA].", names(glb_OOBobs_df), value=TRUE),
# union(myfind_chr_cols_df(glb_OOBobs_df),
# grep(".fctr", names(glb_OOBobs_df), fixed=TRUE, value=TRUE)))]))
# }
dsp_hdlpfx_results <- function(hdlpfx) {
print(hdlpfx)
print(glb_OOBobs_df[glb_OOBobs_df$Headline.pfx %in% c(hdlpfx),
grep(glb_rsp_var, names(glb_OOBobs_df), value=TRUE)])
print(glb_newobs_df[glb_newobs_df$Headline.pfx %in% c(hdlpfx),
grep(glb_rsp_var, names(glb_newobs_df), value=TRUE)])
print(dsp_vctr <- colSums(glb_newobs_df[glb_newobs_df$Headline.pfx %in% c(hdlpfx),
setdiff(grep("[HSA]\\.", names(glb_newobs_df), value=TRUE),
union(myfind_chr_cols_df(glb_newobs_df),
grep(".fctr", names(glb_newobs_df), fixed=TRUE, value=TRUE)))]))
print(dsp_vctr <- dsp_vctr[dsp_vctr != 0])
print(glb_newobs_df[glb_newobs_df$Headline.pfx %in% c(hdlpfx),
union(names(dsp_vctr), myfind_chr_cols_df(glb_newobs_df))])
}
#dsp_hdlpfx_results(hdlpfx="Ask Well::")
# print("myMisc::|OpEd|blank|blank|1:")
# print(glb_OOBobs_df[glb_OOBobs_df$UniqueID %in% c(6446),
# grep(glb_rsp_var, names(glb_OOBobs_df), value=TRUE)])
# print(glb_OOBobs_df[glb_OOBobs_df$UniqueID %in% FN_OOB_ids,
# c("WordCount", "WordCount.log", "myMultimedia",
# "NewsDesk", "SectionName", "SubsectionName")])
# print(mycreate_sqlxtab_df(glb_allobs_df[sel_obs(Headline.contains="[Vv]ideo"), ],
# c(glb_rsp_var, "myMultimedia")))
# dsp_chisq.test(Headline.contains="[Vi]deo")
# print(glb_allobs_df[sel_obs(Headline.contains="[Vv]ideo"),
# c(glb_rsp_var, "Popular", "myMultimedia", "Headline")])
# print(glb_allobs_df[sel_obs(Headline.contains="[Ee]bola", Popular=1),
# c(glb_rsp_var, "Popular", "myMultimedia", "Headline",
# "NewsDesk", "SectionName", "SubsectionName")])
# print(subset(glb_feats_df, !is.na(importance))[,
# c("is.ConditionalX.y",
# grep("importance", names(glb_feats_df), fixed=TRUE, value=TRUE))])
# print(subset(glb_feats_df, is.ConditionalX.y & is.na(importance))[,
# c("is.ConditionalX.y",
# grep("importance", names(glb_feats_df), fixed=TRUE, value=TRUE))])
# print(subset(glb_feats_df, !is.na(importance))[,
# c("zeroVar", "nzv", "myNearZV",
# grep("importance", names(glb_feats_df), fixed=TRUE, value=TRUE))])
# print(subset(glb_feats_df, is.na(importance))[,
# c("zeroVar", "nzv", "myNearZV",
# grep("importance", names(glb_feats_df), fixed=TRUE, value=TRUE))])
print(orderBy(as.formula(paste0("~ -", glb_sel_mdl_id, ".importance")), glb_featsimp_df))
## Ensemble.glmnet.importance
## sold.fctr.predict.All.X.no.rnorm.rf.prob 100.000000
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob 98.138116
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob 7.031892
## sold.fctr.predict.All.Interact.X.bayesglm.prob 4.587832
## sold.fctr.predict.All.Interact.X.glm.prob 0.000000
## sold.fctr.predict.All.Interact.X.glmnet.prob 0.000000
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob 0.000000
## sold.fctr.predict.All.X.bayesglm.prob 0.000000
## sold.fctr.predict.All.X.glm.prob 0.000000
## sold.fctr.predict.All.X.glmnet.prob 0.000000
## sold.fctr.predict.All.X.no.rnorm.rpart.prob 0.000000
## sold.fctr.predict.Interact.High.cor.Y.glm.prob 0.000000
## sold.fctr.predict.Low.cor.X.glm.prob 0.000000
## sold.fctr.predict.Max.cor.Y.glm.prob 0.000000
## sold.fctr.predict.Max.cor.Y.rpart.prob 0.000000
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob NA
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob NA
## importance
## sold.fctr.predict.All.X.no.rnorm.rf.prob NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob NA
## sold.fctr.predict.All.Interact.X.bayesglm.prob NA
## sold.fctr.predict.All.Interact.X.glm.prob NA
## sold.fctr.predict.All.Interact.X.glmnet.prob NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob NA
## sold.fctr.predict.All.X.bayesglm.prob NA
## sold.fctr.predict.All.X.glm.prob NA
## sold.fctr.predict.All.X.glmnet.prob NA
## sold.fctr.predict.All.X.no.rnorm.rpart.prob NA
## sold.fctr.predict.Interact.High.cor.Y.glm.prob NA
## sold.fctr.predict.Low.cor.X.glm.prob NA
## sold.fctr.predict.Max.cor.Y.glm.prob NA
## sold.fctr.predict.Max.cor.Y.rpart.prob NA
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob 100.00000
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob 99.24746
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob 0.00000
## Final.glmnet.importance
## sold.fctr.predict.All.X.no.rnorm.rf.prob NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rf.prob NA
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.rpart.prob NA
## sold.fctr.predict.All.Interact.X.bayesglm.prob NA
## sold.fctr.predict.All.Interact.X.glm.prob NA
## sold.fctr.predict.All.Interact.X.glmnet.prob NA
## sold.fctr.predict.All.Interact.X.no.rnorm.rpart.prob NA
## sold.fctr.predict.All.X.bayesglm.prob NA
## sold.fctr.predict.All.X.glm.prob NA
## sold.fctr.predict.All.X.glmnet.prob NA
## sold.fctr.predict.All.X.no.rnorm.rpart.prob NA
## sold.fctr.predict.Interact.High.cor.Y.glm.prob NA
## sold.fctr.predict.Low.cor.X.glm.prob NA
## sold.fctr.predict.Max.cor.Y.glm.prob NA
## sold.fctr.predict.Max.cor.Y.rpart.prob NA
## sold.fctr.predict.All.X.no.rnorm.Train.rf.prob 100.00000
## sold.fctr.predict.All.Interact.X.no.rnorm.Train.rf.prob 99.24746
## sold.fctr.predict.Max.cor.Y.cv.0.cp.0.Train.rpart.prob 0.00000
print("glb_newobs_df prediction stats:")
## [1] "glb_newobs_df prediction stats:"
print(myplot_histogram(glb_newobs_df, paste0(glb_rsp_var_out, glb_fin_mdl_id)))
if (glb_is_classification)
print(table(glb_newobs_df[, paste0(glb_rsp_var_out, glb_fin_mdl_id)]))
##
## N Y
## 557 241
# players_df <- data.frame(id=c("Chavez", "Giambi", "Menechino", "Myers", "Pena"),
# OBP=c(0.338, 0.391, 0.369, 0.313, 0.361),
# SLG=c(0.540, 0.450, 0.374, 0.447, 0.500),
# cost=c(1400000, 1065000, 295000, 800000, 300000))
# players_df$RS.predict <- predict(glb_models_lst[[csm_mdl_id]], players_df)
# print(orderBy(~ -RS.predict, players_df))
if (length(diff <- setdiff(names(glb_trnobs_df), names(glb_allobs_df))) > 0)
print(diff)
for (col in setdiff(names(glb_trnobs_df), names(glb_allobs_df)))
# Merge or cbind ?
glb_allobs_df[glb_allobs_df$.src == "Train", col] <- glb_trnobs_df[, col]
if (length(diff <- setdiff(names(glb_fitobs_df), names(glb_allobs_df))) > 0)
print(diff)
if (length(diff <- setdiff(names(glb_OOBobs_df), names(glb_allobs_df))) > 0)
print(diff)
for (col in setdiff(names(glb_OOBobs_df), names(glb_allobs_df)))
# Merge or cbind ?
glb_allobs_df[glb_allobs_df$.lcn == "OOB", col] <- glb_OOBobs_df[, col]
if (length(diff <- setdiff(names(glb_newobs_df), names(glb_allobs_df))) > 0)
print(diff)
if (glb_save_envir)
save(glb_feats_df, glb_allobs_df,
#glb_trnobs_df, glb_fitobs_df, glb_OOBobs_df, glb_newobs_df,
glb_models_df, dsp_models_df, glb_models_lst, glb_model_type,
glb_sel_mdl, glb_sel_mdl_id,
glb_fin_mdl, glb_fin_mdl_id,
file=paste0(glb_out_pfx, "prdnew_dsk.RData"))
rm(submit_df, tmp_OOBobs_df)
# tmp_replay_lst <- replay.petrisim(pn=glb_analytics_pn,
# replay.trans=(glb_analytics_avl_objs <- c(glb_analytics_avl_objs,
# "data.new.prediction")), flip_coord=TRUE)
# print(ggplot.petrinet(tmp_replay_lst[["pn"]]) + coord_flip())
glb_chunks_df <- myadd_chunk(glb_chunks_df, "display.session.info", major.inc=TRUE)
## label step_major step_minor bgn end elapsed
## 16 predict.data.new 9 0 425.622 434.567 8.945
## 17 display.session.info 10 0 434.568 NA NA
Null Hypothesis (\(\sf{H_{0}}\)): mpg is not impacted by am_fctr.
The variance by am_fctr appears to be independent. #{r q1, cache=FALSE} # print(t.test(subset(cars_df, am_fctr == "automatic")$mpg, # subset(cars_df, am_fctr == "manual")$mpg, # var.equal=FALSE)$conf) # We reject the null hypothesis i.e. we have evidence to conclude that am_fctr impacts mpg (95% confidence). Manual transmission is better for miles per gallon versus automatic transmission.
## label step_major step_minor bgn end elapsed
## 11 fit.models 7 1 92.533 236.654 144.122
## 14 fit.data.training 8 0 276.638 419.196 142.558
## 5 extract.features 3 0 17.868 53.117 35.249
## 12 fit.models 7 2 236.655 268.982 32.327
## 10 fit.models 7 0 67.216 92.532 25.317
## 16 predict.data.new 9 0 425.622 434.567 8.945
## 13 fit.models 7 3 268.982 276.637 7.655
## 15 fit.data.training 8 1 419.196 425.621 6.426
## 8 select.features 5 0 60.320 66.001 5.681
## 7 manage.missing.data 4 1 54.646 60.319 5.673
## 2 inspect.data 2 0 12.560 16.480 3.920
## 1 import.data 1 0 8.653 12.560 3.907
## 6 cluster.data 4 0 53.117 54.646 1.529
## 9 partition.data.training 6 0 66.001 67.215 1.214
## 3 scrub.data 2 1 16.480 17.229 0.749
## 4 transform.data 2 2 17.229 17.867 0.638
## duration
## 11 144.121
## 14 142.558
## 5 35.249
## 12 32.327
## 10 25.316
## 16 8.945
## 13 7.655
## 15 6.425
## 8 5.681
## 7 5.673
## 2 3.920
## 1 3.907
## 6 1.529
## 9 1.214
## 3 0.749
## 4 0.638
## [1] "Total Elapsed Time: 434.567 secs"
## R version 3.2.1 (2015-06-18)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: OS X 10.10.4 (Yosemite)
##
## locale:
## [1] C/en_US.UTF-8/C/C/C/en_US.UTF-8
##
## attached base packages:
## [1] tcltk grid parallel stats graphics grDevices utils
## [8] datasets methods base
##
## other attached packages:
## [1] randomForest_4.6-10 glmnet_2.0-2 arm_1.8-6
## [4] lme4_1.1-8 Matrix_1.2-2 MASS_7.3-43
## [7] rpart.plot_1.5.2 rpart_4.1-10 ROCR_1.0-7
## [10] gplots_2.17.0 sampling_2.7 entropy_1.2.1
## [13] dynamicTreeCut_1.62 proxy_0.4-15 tidyr_0.2.0
## [16] tm_0.6-2 NLP_0.1-8 stringr_1.0.0
## [19] dplyr_0.4.2 plyr_1.8.3 sqldf_0.4-10
## [22] RSQLite_1.0.0 DBI_0.3.1 gsubfn_0.6-6
## [25] proto_0.3-10 reshape2_1.4.1 gdata_2.17.0
## [28] doMC_1.3.3 iterators_1.0.7 foreach_1.4.2
## [31] doBy_4.5-13 survival_2.38-3 caret_6.0-52
## [34] ggplot2_1.0.1 lattice_0.20-33
##
## loaded via a namespace (and not attached):
## [1] splines_3.2.1 gtools_3.5.0 assertthat_0.1
## [4] stats4_3.2.1 yaml_2.1.13 slam_0.1-32
## [7] quantreg_5.11 chron_2.3-47 digest_0.6.8
## [10] RColorBrewer_1.1-2 minqa_1.2.4 colorspace_1.2-6
## [13] htmltools_0.2.6 lpSolve_5.6.11 BradleyTerry2_1.0-6
## [16] SparseM_1.6 scales_0.2.5 brglm_0.5-9
## [19] mgcv_1.8-7 car_2.0-25 nnet_7.3-10
## [22] lazyeval_0.1.10 pbkrtest_0.4-2 magrittr_1.5
## [25] evaluate_0.7 nlme_3.1-121 class_7.3-13
## [28] tools_3.2.1 formatR_1.2 munsell_0.4.2
## [31] compiler_3.2.1 e1071_1.6-6 caTools_1.17.1
## [34] nloptr_1.0.4 bitops_1.0-6 labeling_0.3
## [37] rmarkdown_0.7 gtable_0.1.2 codetools_0.2-14
## [40] abind_1.4-3 R6_2.1.0 knitr_1.10.5
## [43] KernSmooth_2.23-15 stringi_0.5-5 Rcpp_0.12.0
## [46] coda_0.17-1